proc.c 50.0 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * proc.c
4
 *	  routines to manage per-process shared memory data structure
5
 *
B
Bruce Momjian 已提交
6
 * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  src/backend/storage/lmgr/proc.c
12 13 14 15 16
 *
 *-------------------------------------------------------------------------
 */
/*
 * Interface (a):
17
 *		ProcSleep(), ProcWakeup(),
18 19
 *		ProcQueueAlloc() -- create a shm queue for sleeping processes
 *		ProcQueueInit() -- create a queue without allocing memory
20
 *
21 22
 * Waiting for a lock causes the backend to be put to sleep.  Whoever releases
 * the lock wakes the process up again (and gives it an error code so it knows
23 24 25 26
 * whether it was awoken on an error condition).
 *
 * Interface (b):
 *
27 28
 * ProcReleaseLocks -- frees the locks associated with current transaction
 *
29
 * ProcKill -- destroys the shared memory state (and locks)
30
 * associated with the process.
31
 */
32 33
#include "postgres.h"

34
#include <signal.h>
35 36
#include <unistd.h>
#include <sys/time.h>
M
Marc G. Fournier 已提交
37

38
#include "access/transam.h"
39
#include "access/twophase.h"
40
#include "access/xact.h"
41
#include "miscadmin.h"
42
#include "postmaster/autovacuum.h"
R
Robert Haas 已提交
43
#include "replication/slot.h"
44
#include "replication/syncrep.h"
45
#include "storage/ipc.h"
46
#include "storage/lmgr.h"
47
#include "storage/pmsignal.h"
48
#include "storage/proc.h"
49
#include "storage/procarray.h"
50
#include "storage/procsignal.h"
51
#include "storage/spin.h"
52
#include "utils/timeout.h"
53
#include "utils/timestamp.h"
54

55

56
/* GUC variables */
B
Bruce Momjian 已提交
57
int			DeadlockTimeout = 1000;
58
int			StatementTimeout = 0;
59
int			LockTimeout = 0;
60
bool		log_lock_waits = false;
M
 
Marc G. Fournier 已提交
61

62
/* Pointer to this process's PGPROC and PGXACT structs, if any */
J
Jan Wieck 已提交
63
PGPROC	   *MyProc = NULL;
64
PGXACT	   *MyPgXact = NULL;
65 66

/*
J
Jan Wieck 已提交
67
 * This spinlock protects the freelist of recycled PGPROC structures.
68
 * We cannot use an LWLock because the LWLock manager depends on already
J
Jan Wieck 已提交
69
 * having a PGPROC and a wait semaphore!  But these structures are touched
70 71
 * relatively infrequently (only at backend startup or shutdown) and not for
 * very long, so a spinlock is okay.
72
 */
73
NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
74

75
/* Pointers to shared-memory structures */
76
PROC_HDR   *ProcGlobal = NULL;
77
NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL;
78
PGPROC	   *PreparedXactProcs = NULL;
79

80 81
/* If we are waiting for a lock, this points to the associated LOCALLOCK */
static LOCALLOCK *lockAwaited = NULL;
82

83
static DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
84

85 86
/* Is a deadlock check pending? */
static volatile sig_atomic_t got_deadlock_timeout;
87

88
static void RemoveProcFromArray(int code, Datum arg);
89
static void ProcKill(int code, Datum arg);
90
static void AuxiliaryProcKill(int code, Datum arg);
91
static void CheckDeadLock(void);
92

V
Vadim B. Mikheev 已提交
93

94 95 96
/*
 * Report shared-memory space needed by InitProcGlobal.
 */
97
Size
98
ProcGlobalShmemSize(void)
99
{
100 101 102 103
	Size		size = 0;

	/* ProcGlobal */
	size = add_size(size, sizeof(PROC_HDR));
104
	/* MyProcs, including autovacuum workers and launcher */
105
	size = add_size(size, mul_size(MaxBackends, sizeof(PGPROC)));
106 107 108 109
	/* AuxiliaryProcs */
	size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC)));
	/* Prepared xacts */
	size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGPROC)));
110 111
	/* ProcStructLock */
	size = add_size(size, sizeof(slock_t));
112

113 114 115 116
	size = add_size(size, mul_size(MaxBackends, sizeof(PGXACT)));
	size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGXACT)));
	size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGXACT)));

117 118 119
	return size;
}

120 121 122 123
/*
 * Report number of semaphores needed by InitProcGlobal.
 */
int
124
ProcGlobalSemas(void)
125
{
126 127 128 129
	/*
	 * We need a sema per backend (including autovacuum), plus one for each
	 * auxiliary process.
	 */
130
	return MaxBackends + NUM_AUXILIARY_PROCS;
131 132
}

133 134
/*
 * InitProcGlobal -
135 136
 *	  Initialize the global process table during postmaster or standalone
 *	  backend startup.
137
 *
138
 *	  We also create all the per-process semaphores we will need to support
139 140 141 142 143 144 145
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
 *	  (mis)configured with small limits on the number of semaphores, and
 *	  running out when trying to start another backend is a common failure.
 *	  So, now we grab enough semaphores to support the desired max number
 *	  of backends immediately at initialization --- if the sysadmin has set
146 147
 *	  MaxConnections, max_worker_processes, or autovacuum_max_workers higher
 *	  than his kernel will support, he'll find out sooner rather than later.
148 149 150 151
 *
 *	  Another reason for creating semaphores here is that the semaphore
 *	  implementation typically requires us to create semaphores in the
 *	  postmaster, not in backends.
152 153
 *
 * Note: this is NOT called by individual backends under a postmaster,
154
 * not even in the EXEC_BACKEND case.  The ProcGlobal and AuxiliaryProcs
155
 * pointers must be propagated specially for EXEC_BACKEND operation.
156 157
 */
void
158
InitProcGlobal(void)
159
{
160
	PGPROC	   *procs;
161
	PGXACT	   *pgxacts;
162 163
	int			i,
				j;
164
	bool		found;
165
	uint32		TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts;
166

167
	/* Create the ProcGlobal shared structure */
168
	ProcGlobal = (PROC_HDR *)
169 170
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
	Assert(!found);
171

172 173 174
	/*
	 * Initialize the data structures.
	 */
R
Robert Haas 已提交
175
	ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY;
176 177
	ProcGlobal->freeProcs = NULL;
	ProcGlobal->autovacFreeProcs = NULL;
A
Alvaro Herrera 已提交
178
	ProcGlobal->bgworkerFreeProcs = NULL;
179 180 181
	ProcGlobal->startupProc = NULL;
	ProcGlobal->startupProcPid = 0;
	ProcGlobal->startupBufferPinWaitBufId = -1;
182 183
	ProcGlobal->walwriterLatch = NULL;
	ProcGlobal->checkpointerLatch = NULL;
184
	pg_atomic_init_u32(&ProcGlobal->nextClearXidElem, INVALID_PGPROCNO);
185

186
	/*
187
	 * Create and initialize all the PGPROC structures we'll need.  There are
A
Alvaro Herrera 已提交
188 189 190
	 * five separate consumers: (1) normal backends, (2) autovacuum workers
	 * and the autovacuum launcher, (3) background workers, (4) auxiliary
	 * processes, and (5) prepared transactions.  Each PGPROC structure is
B
Bruce Momjian 已提交
191 192
	 * dedicated to exactly one of these purposes, and they do not move
	 * between groups.
193
	 */
R
Robert Haas 已提交
194
	procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC));
195
	ProcGlobal->allProcs = procs;
196 197
	/* XXX allProcCount isn't really all of them; it excludes prepared xacts */
	ProcGlobal->allProcCount = MaxBackends + NUM_AUXILIARY_PROCS;
198 199 200 201
	if (!procs)
		ereport(FATAL,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of shared memory")));
R
Robert Haas 已提交
202
	MemSet(procs, 0, TotalProcs * sizeof(PGPROC));
203 204 205 206 207 208

	/*
	 * Also allocate a separate array of PGXACT structures.  This is separate
	 * from the main PGPROC array so that the most heavily accessed data is
	 * stored contiguously in memory in as few cache lines as possible. This
	 * provides significant performance benefits, especially on a
209
	 * multiprocessor system.  There is one PGXACT structure for every PGPROC
210 211 212 213 214 215
	 * structure.
	 */
	pgxacts = (PGXACT *) ShmemAlloc(TotalProcs * sizeof(PGXACT));
	MemSet(pgxacts, 0, TotalProcs * sizeof(PGXACT));
	ProcGlobal->allPgXact = pgxacts;

R
Robert Haas 已提交
216
	for (i = 0; i < TotalProcs; i++)
217
	{
R
Robert Haas 已提交
218
		/* Common initialization for all PGPROCs, regardless of type. */
219

220
		/*
221 222 223
		 * Set up per-PGPROC semaphore, latch, and backendLock. Prepared xact
		 * dummy PGPROCs don't need these though - they're never associated
		 * with a real process
224 225 226 227 228 229 230 231
		 */
		if (i < MaxBackends + NUM_AUXILIARY_PROCS)
		{
			PGSemaphoreCreate(&(procs[i].sem));
			InitSharedLatch(&(procs[i].procLatch));
			procs[i].backendLock = LWLockAssign();
		}
		procs[i].pgprocno = i;
R
Robert Haas 已提交
232 233

		/*
A
Alvaro Herrera 已提交
234
		 * Newly created PGPROCs for normal backends, autovacuum and bgworkers
B
Bruce Momjian 已提交
235
		 * must be queued up on the appropriate free list.  Because there can
A
Alvaro Herrera 已提交
236 237
		 * only ever be a small, fixed number of auxiliary processes, no free
		 * list is used in that case; InitAuxiliaryProcess() instead uses a
B
Bruce Momjian 已提交
238
		 * linear search.   PGPROCs for prepared transactions are added to a
A
Alvaro Herrera 已提交
239
		 * free list by TwoPhaseShmemInit().
R
Robert Haas 已提交
240 241 242 243 244 245
		 */
		if (i < MaxConnections)
		{
			/* PGPROC for normal backend, add to freeProcs list */
			procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs;
			ProcGlobal->freeProcs = &procs[i];
246
			procs[i].procgloballist = &ProcGlobal->freeProcs;
R
Robert Haas 已提交
247
		}
A
Alvaro Herrera 已提交
248
		else if (i < MaxConnections + autovacuum_max_workers + 1)
R
Robert Haas 已提交
249 250 251 252
		{
			/* PGPROC for AV launcher/worker, add to autovacFreeProcs list */
			procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs;
			ProcGlobal->autovacFreeProcs = &procs[i];
253
			procs[i].procgloballist = &ProcGlobal->autovacFreeProcs;
R
Robert Haas 已提交
254
		}
A
Alvaro Herrera 已提交
255 256 257 258 259
		else if (i < MaxBackends)
		{
			/* PGPROC for bgworker, add to bgworkerFreeProcs list */
			procs[i].links.next = (SHM_QUEUE *) ProcGlobal->bgworkerFreeProcs;
			ProcGlobal->bgworkerFreeProcs = &procs[i];
260
			procs[i].procgloballist = &ProcGlobal->bgworkerFreeProcs;
A
Alvaro Herrera 已提交
261
		}
262 263 264 265

		/* Initialize myProcLocks[] shared memory queues. */
		for (j = 0; j < NUM_LOCK_PARTITIONS; j++)
			SHMQueueInit(&(procs[i].myProcLocks[j]));
266 267
	}

268
	/*
269 270
	 * Save pointers to the blocks of PGPROC structures reserved for auxiliary
	 * processes and prepared transactions.
271
	 */
R
Robert Haas 已提交
272
	AuxiliaryProcs = &procs[MaxBackends];
273
	PreparedXactProcs = &procs[MaxBackends + NUM_AUXILIARY_PROCS];
274 275 276 277

	/* Create ProcStructLock spinlock, too */
	ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
	SpinLockInit(ProcStructLock);
278 279
}

280
/*
281
 * InitProcess -- initialize a per-process data structure for this backend
282 283
 */
void
284
InitProcess(void)
285
{
286 287
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;
288
	PGPROC * volatile * procgloballist;
289 290

	/*
291 292
	 * ProcGlobal should be set up already (if we are a backend, we inherit
	 * this by fork() or EXEC_BACKEND mechanism from the postmaster).
293
	 */
294
	if (procglobal == NULL)
295
		elog(PANIC, "proc header uninitialized");
296 297

	if (MyProc != NULL)
298
		elog(ERROR, "you already exist");
299

300 301 302 303 304 305 306 307
	/* Decide which list should supply our PGPROC. */
	if (IsAnyAutoVacuumProcess())
		procgloballist = &procglobal->autovacFreeProcs;
	else if (IsBackgroundWorker)
		procgloballist = &procglobal->bgworkerFreeProcs;
	else
		procgloballist = &procglobal->freeProcs;

308
	/*
309 310
	 * Try to get a proc struct from the appropriate free list.  If this
	 * fails, we must be out of PGPROC structures (not to mention semaphores).
311
	 *
B
Bruce Momjian 已提交
312 313
	 * While we are holding the ProcStructLock, also copy the current shared
	 * estimate of spins_per_delay to local storage.
314
	 */
315
	SpinLockAcquire(ProcStructLock);
316

317 318
	set_spins_per_delay(procglobal->spins_per_delay);

319
	MyProc = *procgloballist;
320

321
	if (MyProc != NULL)
322
	{
323
		*procgloballist = (PGPROC *) MyProc->links.next;
324
		SpinLockRelease(ProcStructLock);
325 326 327 328
	}
	else
	{
		/*
B
Bruce Momjian 已提交
329 330
		 * If we reach here, all the PGPROCs are in use.  This is one of the
		 * possible places to detect "too many backends", so give the standard
331 332
		 * error message.  XXX do we need to give a different failure message
		 * in the autovacuum case?
333
		 */
334
		SpinLockRelease(ProcStructLock);
335 336 337
		ereport(FATAL,
				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
				 errmsg("sorry, too many clients already")));
338
	}
339
	MyPgXact = &ProcGlobal->allPgXact[MyProc->pgprocno];
340

341 342 343 344 345 346
	/*
	 * Cross-check that the PGPROC is of the type we expect; if this were
	 * not the case, it would get returned to the wrong list.
	 */
	Assert(MyProc->procgloballist == procgloballist);

347 348
	/*
	 * Now that we have a PGPROC, mark ourselves as an active postmaster
349
	 * child; this is so that the postmaster can detect it if we exit without
350 351
	 * cleaning up.  (XXX autovac launcher currently doesn't participate in
	 * this; it probably should.)
352
	 */
353
	if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess())
354
		MarkPostmasterChildActive();
355

356
	/*
357 358
	 * Initialize all fields of MyProc, except for those previously
	 * initialized by InitProcGlobal.
359
	 */
360
	SHMQueueElemInit(&(MyProc->links));
361
	MyProc->waitStatus = STATUS_OK;
362
	MyProc->lxid = InvalidLocalTransactionId;
363 364
	MyProc->fpVXIDLock = false;
	MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
365 366
	MyPgXact->xid = InvalidTransactionId;
	MyPgXact->xmin = InvalidTransactionId;
367
	MyProc->pid = MyProcPid;
368 369
	/* backendId, databaseId and roleId will be filled in later */
	MyProc->backendId = InvalidBackendId;
370
	MyProc->databaseId = InvalidOid;
371
	MyProc->roleId = InvalidOid;
372
	MyPgXact->delayChkpt = false;
373
	MyPgXact->vacuumFlags = 0;
374
	/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
375
	if (IsAutoVacuumWorkerProcess())
376
		MyPgXact->vacuumFlags |= PROC_IS_AUTOVACUUM;
377
	MyProc->lwWaiting = false;
378
	MyProc->lwWaitMode = 0;
379
	MyProc->waitLock = NULL;
380
	MyProc->waitProcLock = NULL;
381 382
#ifdef USE_ASSERT_CHECKING
	{
383
		int			i;
384 385 386 387 388 389

		/* Last process should have released all locks. */
		for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
			Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i])));
	}
#endif
390
	MyProc->recoveryConflictPending = false;
391

392
	/* Initialize fields for sync rep */
393
	MyProc->waitLSN = 0;
394 395
	MyProc->syncRepState = SYNC_REP_NOT_WAITING;
	SHMQueueElemInit(&(MyProc->syncRepLinks));
396

397 398 399 400
	/* Initialize fields for group XID clearing. */
	MyProc->backendLatestXid = InvalidTransactionId;
	pg_atomic_init_u32(&MyProc->nextClearXidElem, INVALID_PGPROCNO);

401
	/*
402 403 404
	 * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
	 * on it.  That allows us to repoint the process latch, which so far
	 * points to process local one, to the shared one.
405 406
	 */
	OwnLatch(&MyProc->procLatch);
407
	SwitchToSharedLatch();
408

409
	/*
410
	 * We might be reusing a semaphore that belonged to a failed process. So
B
Bruce Momjian 已提交
411
	 * be careful and reinitialize its value here.  (This is not strictly
412
	 * necessary anymore, but seems like a good idea for cleanliness.)
413
	 */
414
	PGSemaphoreReset(&MyProc->sem);
415

416
	/*
417
	 * Arrange to clean up at backend exit.
418
	 */
419
	on_shmem_exit(ProcKill, 0);
420 421

	/*
B
Bruce Momjian 已提交
422
	 * Now that we have a PGPROC, we could try to acquire locks, so initialize
423
	 * local state needed for LWLocks, and the deadlock checker.
424
	 */
425
	InitLWLockAccess();
426
	InitDeadLockChecking();
427 428
}

429 430 431 432
/*
 * InitProcessPhase2 -- make MyProc visible in the shared ProcArray.
 *
 * This is separate from InitProcess because we can't acquire LWLocks until
433 434
 * we've created a PGPROC, but in the EXEC_BACKEND case ProcArrayAdd won't
 * work until after we've done CreateSharedMemoryAndSemaphores.
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
 */
void
InitProcessPhase2(void)
{
	Assert(MyProc != NULL);

	/*
	 * Add our PGPROC to the PGPROC array in shared memory.
	 */
	ProcArrayAdd(MyProc);

	/*
	 * Arrange to clean that up at backend exit.
	 */
	on_shmem_exit(RemoveProcFromArray, 0);
}

452
/*
453
 * InitAuxiliaryProcess -- create a per-auxiliary-process data structure
454
 *
455 456
 * This is called by bgwriter and similar processes so that they will have a
 * MyProc value that's real enough to let them wait for LWLocks.  The PGPROC
457
 * and sema that are assigned are one of the extra ones created during
458
 * InitProcGlobal.
459
 *
460
 * Auxiliary processes are presently not expected to wait for real (lockmgr)
461
 * locks, so we need not set up the deadlock checker.  They are never added
B
Bruce Momjian 已提交
462
 * to the ProcArray or the sinval messaging mechanism, either.  They also
463 464
 * don't get a VXID assigned, since this is only useful when we actually
 * hold lockmgr locks.
465 466 467 468 469
 *
 * Startup process however uses locks but never waits for them in the
 * normal backend sense. Startup process also takes part in sinval messaging
 * as a sendOnly process, so never reads messages from sinval queue. So
 * Startup process does have a VXID and does show up in pg_locks.
470 471
 */
void
472
InitAuxiliaryProcess(void)
473
{
474
	PGPROC	   *auxproc;
475
	int			proctype;
J
Jan Wieck 已提交
476

477
	/*
478 479
	 * ProcGlobal should be set up already (if we are a backend, we inherit
	 * this by fork() or EXEC_BACKEND mechanism from the postmaster).
480
	 */
481
	if (ProcGlobal == NULL || AuxiliaryProcs == NULL)
482
		elog(PANIC, "proc header uninitialized");
483 484

	if (MyProc != NULL)
485
		elog(ERROR, "you already exist");
486

487
	/*
488
	 * We use the ProcStructLock to protect assignment and releasing of
489
	 * AuxiliaryProcs entries.
490
	 *
B
Bruce Momjian 已提交
491 492
	 * While we are holding the ProcStructLock, also copy the current shared
	 * estimate of spins_per_delay to local storage.
493 494 495 496 497
	 */
	SpinLockAcquire(ProcStructLock);

	set_spins_per_delay(ProcGlobal->spins_per_delay);

498
	/*
499
	 * Find a free auxproc ... *big* trouble if there isn't one ...
500
	 */
501
	for (proctype = 0; proctype < NUM_AUXILIARY_PROCS; proctype++)
502
	{
503 504
		auxproc = &AuxiliaryProcs[proctype];
		if (auxproc->pid == 0)
505 506
			break;
	}
507
	if (proctype >= NUM_AUXILIARY_PROCS)
508 509
	{
		SpinLockRelease(ProcStructLock);
510
		elog(FATAL, "all AuxiliaryProcs are in use");
511
	}
512

513
	/* Mark auxiliary proc as in use by me */
514
	/* use volatile pointer to prevent code rearrangement */
515
	((volatile PGPROC *) auxproc)->pid = MyProcPid;
516

517
	MyProc = auxproc;
518
	MyPgXact = &ProcGlobal->allPgXact[auxproc->pgprocno];
519 520 521

	SpinLockRelease(ProcStructLock);

522
	/*
523 524
	 * Initialize all fields of MyProc, except for those previously
	 * initialized by InitProcGlobal.
525 526
	 */
	SHMQueueElemInit(&(MyProc->links));
527
	MyProc->waitStatus = STATUS_OK;
528
	MyProc->lxid = InvalidLocalTransactionId;
529 530
	MyProc->fpVXIDLock = false;
	MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
531 532
	MyPgXact->xid = InvalidTransactionId;
	MyPgXact->xmin = InvalidTransactionId;
533
	MyProc->backendId = InvalidBackendId;
534
	MyProc->databaseId = InvalidOid;
535
	MyProc->roleId = InvalidOid;
536
	MyPgXact->delayChkpt = false;
537
	MyPgXact->vacuumFlags = 0;
538
	MyProc->lwWaiting = false;
539
	MyProc->lwWaitMode = 0;
540
	MyProc->waitLock = NULL;
541
	MyProc->waitProcLock = NULL;
542 543
#ifdef USE_ASSERT_CHECKING
	{
544
		int			i;
545 546 547 548 549 550

		/* Last process should have released all locks. */
		for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
			Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i])));
	}
#endif
551

552
	/*
553 554 555
	 * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
	 * on it.  That allows us to repoint the process latch, which so far
	 * points to process local one, to the shared one.
556 557
	 */
	OwnLatch(&MyProc->procLatch);
558
	SwitchToSharedLatch();
559

560
	/*
B
Bruce Momjian 已提交
561
	 * We might be reusing a semaphore that belonged to a failed process. So
B
Bruce Momjian 已提交
562
	 * be careful and reinitialize its value here.  (This is not strictly
563
	 * necessary anymore, but seems like a good idea for cleanliness.)
564
	 */
565
	PGSemaphoreReset(&MyProc->sem);
566 567 568 569

	/*
	 * Arrange to clean up at process exit.
	 */
570
	on_shmem_exit(AuxiliaryProcKill, Int32GetDatum(proctype));
571 572
}

573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
/*
 * Record the PID and PGPROC structures for the Startup process, for use in
 * ProcSendSignal().  See comments there for further explanation.
 */
void
PublishStartupProcessInformation(void)
{
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

	SpinLockAcquire(ProcStructLock);

	procglobal->startupProc = MyProc;
	procglobal->startupProcPid = MyProcPid;

	SpinLockRelease(ProcStructLock);
}

591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
/*
 * Used from bufgr to share the value of the buffer that Startup waits on,
 * or to reset the value to "not waiting" (-1). This allows processing
 * of recovery conflicts for buffer pins. Set is made before backends look
 * at this value, so locking not required, especially since the set is
 * an atomic integer set operation.
 */
void
SetStartupBufferPinWaitBufId(int bufid)
{
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

	procglobal->startupBufferPinWaitBufId = bufid;
}

/*
 * Used by backends when they receive a request to check for buffer pin waits.
 */
int
GetStartupBufferPinWaitBufId(void)
{
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

616
	return procglobal->startupBufferPinWaitBufId;
617 618
}

619 620 621 622 623 624 625 626 627
/*
 * Check whether there are at least N free PGPROC objects.
 *
 * Note: this is designed on the assumption that N will generally be small.
 */
bool
HaveNFreeProcs(int n)
{
	PGPROC	   *proc;
B
Bruce Momjian 已提交
628

629 630 631 632 633
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

	SpinLockAcquire(ProcStructLock);

634
	proc = procglobal->freeProcs;
635

636
	while (n > 0 && proc != NULL)
637
	{
638
		proc = (PGPROC *) proc->links.next;
639 640 641 642 643 644 645 646
		n--;
	}

	SpinLockRelease(ProcStructLock);

	return (n <= 0);
}

647 648 649
/*
 * Check if the current process is awaiting a lock.
 */
650 651 652 653 654 655 656 657 658
bool
IsWaitingForLock(void)
{
	if (lockAwaited == NULL)
		return false;

	return true;
}

659
/*
660 661
 * Cancel any pending wait for lock, when aborting a transaction, and revert
 * any strong lock count acquisition for a lock being acquired.
662 663
 *
 * (Normally, this would only happen if we accept a cancel/die
664 665
 * interrupt while waiting; but an ereport(ERROR) before or during the lock
 * wait is within the realm of possibility, too.)
666
 */
667
void
668
LockErrorCleanup(void)
669
{
670
	LWLock	   *partitionLock;
671
	DisableTimeoutParams timeouts[2];
672

673 674
	HOLD_INTERRUPTS();

675 676
	AbortStrongLockAcquire();

677
	/* Nothing to do if we weren't waiting for a lock */
678
	if (lockAwaited == NULL)
679 680
	{
		RESUME_INTERRUPTS();
681
		return;
682
	}
683

684 685 686 687 688 689 690 691 692 693 694 695 696
	/*
	 * Turn off the deadlock and lock timeout timers, if they are still
	 * running (see ProcSleep).  Note we must preserve the LOCK_TIMEOUT
	 * indicator flag, since this function is executed before
	 * ProcessInterrupts when responding to SIGINT; else we'd lose the
	 * knowledge that the SIGINT came from a lock timeout and not an external
	 * source.
	 */
	timeouts[0].id = DEADLOCK_TIMEOUT;
	timeouts[0].keep_indicator = false;
	timeouts[1].id = LOCK_TIMEOUT;
	timeouts[1].keep_indicator = true;
	disable_timeouts(timeouts, 2);
697 698

	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
699
	partitionLock = LockHashPartitionLock(lockAwaited->hashcode);
700
	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
701

702
	if (MyProc->links.next != NULL)
703 704
	{
		/* We could not have been granted the lock yet */
705
		RemoveFromWaitQueue(MyProc, lockAwaited->hashcode);
706 707 708 709 710
	}
	else
	{
		/*
		 * Somebody kicked us off the lock queue already.  Perhaps they
B
Bruce Momjian 已提交
711 712 713
		 * granted us the lock, or perhaps they detected a deadlock. If they
		 * did grant us the lock, we'd better remember it in our local lock
		 * table.
714
		 */
715 716
		if (MyProc->waitStatus == STATUS_OK)
			GrantAwaitedLock();
717 718
	}

719
	lockAwaited = NULL;
720

721
	LWLockRelease(partitionLock);
H
Hiroshi Inoue 已提交
722

723
	RESUME_INTERRUPTS();
H
Hiroshi Inoue 已提交
724
}
725

726

727
/*
728
 * ProcReleaseLocks() -- release locks associated with current transaction
729
 *			at main transaction commit or abort
730
 *
731
 * At main transaction commit, we release standard locks except session locks.
732
 * At main transaction abort, we release all locks including session locks.
733
 *
734 735 736
 * Advisory locks are released only if they are transaction-level;
 * session-level holds remain, whether this is a commit or not.
 *
737
 * At subtransaction commit, we don't release any locks (so this func is not
738
 * needed at all); we will defer the releasing to the parent transaction.
739
 * At subtransaction abort, we release all locks held by the subtransaction;
740 741
 * this is implemented by retail releasing of the locks under control of
 * the ResourceOwner mechanism.
742 743
 */
void
744
ProcReleaseLocks(bool isCommit)
745
{
746 747
	if (!MyProc)
		return;
748
	/* If waiting, get off wait queue (should only be needed after error) */
749
	LockErrorCleanup();
750
	/* Release standard locks, including session-level if aborting */
751
	LockReleaseAll(DEFAULT_LOCKMETHOD, !isCommit);
752
	/* Release transaction-level advisory locks */
753
	LockReleaseAll(USER_LOCKMETHOD, false);
754 755 756
}


757 758 759 760 761 762 763
/*
 * RemoveProcFromArray() -- Remove this process from the shared ProcArray.
 */
static void
RemoveProcFromArray(int code, Datum arg)
{
	Assert(MyProc != NULL);
764
	ProcArrayRemove(MyProc, InvalidTransactionId);
765 766
}

767 768
/*
 * ProcKill() -- Destroy the per-proc data structure for
769
 *		this process. Release any of its held LW locks.
770 771
 */
static void
772
ProcKill(int code, Datum arg)
773
{
774 775
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;
776
	PGPROC	   *proc;
777
	PGPROC * volatile * procgloballist;
778

779
	Assert(MyProc != NULL);
780

781 782 783
	/* Make sure we're out of the sync rep lists */
	SyncRepCleanupAtProcExit();

784 785
#ifdef USE_ASSERT_CHECKING
	{
786
		int			i;
787 788 789 790 791 792 793

		/* Last process should have released all locks. */
		for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
			Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i])));
	}
#endif

794
	/*
B
Bruce Momjian 已提交
795 796
	 * Release any LW locks I am holding.  There really shouldn't be any, but
	 * it's cheap to check again before we cut the knees off the LWLock
797
	 * facility by releasing our PGPROC ...
798
	 */
799
	LWLockReleaseAll();
800

R
Robert Haas 已提交
801 802 803 804
	/* Make sure active replication slots are released */
	if (MyReplicationSlot != NULL)
		ReplicationSlotRelease();

805
	/*
806 807 808 809
	 * Reset MyLatch to the process local one.  This is so that signal
	 * handlers et al can continue using the latch after the shared latch
	 * isn't ours anymore. After that clear MyProc and disown the shared
	 * latch.
810
	 */
811
	SwitchBackToLocalLatch();
812 813 814
	proc = MyProc;
	MyProc = NULL;
	DisownLatch(&proc->procLatch);
815

816
	procgloballist = proc->procgloballist;
817
	SpinLockAcquire(ProcStructLock);
818

819
	/* Return PGPROC structure (and semaphore) to appropriate freelist */
820 821
	proc->links.next = (SHM_QUEUE *) *procgloballist;
	*procgloballist = proc;
822

823 824 825
	/* Update shared estimate of spins_per_delay */
	procglobal->spins_per_delay = update_spins_per_delay(procglobal->spins_per_delay);

826
	SpinLockRelease(ProcStructLock);
827

828 829
	/*
	 * This process is no longer present in shared memory in any meaningful
B
Bruce Momjian 已提交
830 831
	 * way, so tell the postmaster we've cleaned up acceptably well. (XXX
	 * autovac launcher should be included here someday)
832
	 */
833
	if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess())
834 835
		MarkPostmasterChildInactive();

836 837
	/* wake autovac launcher if needed -- see comments in FreeWorkerInfo */
	if (AutovacuumLauncherPid != 0)
838
		kill(AutovacuumLauncherPid, SIGUSR2);
839 840 841
}

/*
842
 * AuxiliaryProcKill() -- Cut-down version of ProcKill for auxiliary
B
Bruce Momjian 已提交
843
 *		processes (bgwriter, etc).  The PGPROC and sema are not released, only
844
 *		marked as not-in-use.
845 846
 */
static void
847
AuxiliaryProcKill(int code, Datum arg)
848
{
B
Bruce Momjian 已提交
849
	int			proctype = DatumGetInt32(arg);
850
	PGPROC	   *auxproc PG_USED_FOR_ASSERTS_ONLY;
851
	PGPROC	   *proc;
J
Jan Wieck 已提交
852

853
	Assert(proctype >= 0 && proctype < NUM_AUXILIARY_PROCS);
J
Jan Wieck 已提交
854

855
	auxproc = &AuxiliaryProcs[proctype];
J
Jan Wieck 已提交
856

857
	Assert(MyProc == auxproc);
858

859
	/* Release any LW locks I am holding (see notes above) */
860 861
	LWLockReleaseAll();

862
	/*
863 864 865 866
	 * Reset MyLatch to the process local one.  This is so that signal
	 * handlers et al can continue using the latch after the shared latch
	 * isn't ours anymore. After that clear MyProc and disown the shared
	 * latch.
867
	 */
868
	SwitchBackToLocalLatch();
869 870 871
	proc = MyProc;
	MyProc = NULL;
	DisownLatch(&proc->procLatch);
872

873 874
	SpinLockAcquire(ProcStructLock);

875
	/* Mark auxiliary proc no longer in use */
876
	proc->pid = 0;
877 878 879 880 881

	/* Update shared estimate of spins_per_delay */
	ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);

	SpinLockRelease(ProcStructLock);
882 883
}

884

885 886
/*
 * ProcQueue package: routines for putting processes to sleep
887
 *		and  waking them up
888 889 890 891 892
 */

/*
 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
 *
893 894
 * Returns: a pointer to the queue
 * Side Effects: Initializes the queue if it wasn't there before
895
 */
896
#ifdef NOT_USED
897
PROC_QUEUE *
898
ProcQueueAlloc(const char *name)
899
{
900
	PROC_QUEUE *queue;
901
	bool		found;
902

903 904 905
	queue = (PROC_QUEUE *)
		ShmemInitStruct(name, sizeof(PROC_QUEUE), &found);

906 907
	if (!found)
		ProcQueueInit(queue);
908

909
	return queue;
910
}
911
#endif
912 913 914 915 916

/*
 * ProcQueueInit -- initialize a shared memory process queue
 */
void
917
ProcQueueInit(PROC_QUEUE *queue)
918
{
919 920
	SHMQueueInit(&(queue->links));
	queue->size = 0;
921 922 923 924
}


/*
925
 * ProcSleep -- put a process to sleep on the specified lock
926
 *
927 928
 * Caller must have set MyProc->heldLocks to reflect locks already held
 * on the lockable object by this process (under all XIDs).
929
 *
930
 * The lock table's partition lock must be held at entry, and will be held
931
 * at exit.
932
 *
933
 * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
934
 *
935
 * ASSUME: that no one will fiddle with the queue until after
936
 *		we release the partition lock.
937 938 939 940
 *
 * NOTES: The process queue is now a priority queue for locking.
 */
int
941
ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
942
{
943 944 945
	LOCKMODE	lockmode = locallock->tag.mode;
	LOCK	   *lock = locallock->lock;
	PROCLOCK   *proclock = locallock->proclock;
946
	uint32		hashcode = locallock->hashcode;
947
	LWLock	   *partitionLock = LockHashPartitionLock(hashcode);
948
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
949
	LOCKMASK	myHeldLocks = MyProc->heldLocks;
950
	bool		early_deadlock = false;
B
Bruce Momjian 已提交
951
	bool		allow_autovacuum_cancel = true;
952
	int			myWaitStatus;
J
Jan Wieck 已提交
953
	PGPROC	   *proc;
954
	int			i;
955

956
	/*
957 958
	 * Determine where to add myself in the wait queue.
	 *
959 960 961 962
	 * Normally I should go at the end of the queue.  However, if I already
	 * hold locks that conflict with the request of any previous waiter, put
	 * myself in the queue just in front of the first such waiter. This is not
	 * a necessary step, since deadlock detection would move me to before that
B
Bruce Momjian 已提交
963 964
	 * waiter anyway; but it's relatively cheap to detect such a conflict
	 * immediately, and avoid delaying till deadlock timeout.
965
	 *
966 967
	 * Special case: if I find I should go in front of some waiter, check to
	 * see if I conflict with already-held locks or the requests before that
B
Bruce Momjian 已提交
968
	 * waiter.  If not, then just grant myself the requested lock immediately.
B
Bruce Momjian 已提交
969 970 971
	 * This is the same as the test for immediate grant in LockAcquire, except
	 * we are only considering the part of the wait queue before my insertion
	 * point.
972 973
	 */
	if (myHeldLocks != 0)
V
Vadim B. Mikheev 已提交
974
	{
975
		LOCKMASK	aheadRequests = 0;
976

977
		proc = (PGPROC *) waitQueue->links.next;
978
		for (i = 0; i < waitQueue->size; i++)
V
Vadim B. Mikheev 已提交
979
		{
980
			/* Must he wait for me? */
B
Bruce Momjian 已提交
981
			if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks)
V
Vadim B. Mikheev 已提交
982
			{
983
				/* Must I wait for him ? */
B
Bruce Momjian 已提交
984
				if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks)
985
				{
986
					/*
B
Bruce Momjian 已提交
987
					 * Yes, so we have a deadlock.  Easiest way to clean up
B
Bruce Momjian 已提交
988 989 990 991
					 * correctly is to call RemoveFromWaitQueue(), but we
					 * can't do that until we are *on* the wait queue. So, set
					 * a flag to check below, and break out of loop.  Also,
					 * record deadlock info for later message.
992
					 */
993
					RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
994 995
					early_deadlock = true;
					break;
996
				}
997
				/* I must go before this waiter.  Check special case. */
B
Bruce Momjian 已提交
998
				if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
999 1000 1001
					LockCheckConflicts(lockMethodTable,
									   lockmode,
									   lock,
1002
									   proclock) == STATUS_OK)
1003
				{
1004
					/* Skip the wait and just grant myself the lock. */
1005
					GrantLock(lock, proclock, lockmode);
1006
					GrantAwaitedLock();
1007
					return STATUS_OK;
1008 1009
				}
				/* Break out of loop to put myself before him */
V
Vadim B. Mikheev 已提交
1010
				break;
1011
			}
1012
			/* Nope, so advance to next waiter */
1013
			aheadRequests |= LOCKBIT_ON(proc->waitLockMode);
1014
			proc = (PGPROC *) proc->links.next;
V
Vadim B. Mikheev 已提交
1015
		}
B
Bruce Momjian 已提交
1016

1017
		/*
B
Bruce Momjian 已提交
1018 1019
		 * If we fall out of loop normally, proc points to waitQueue head, so
		 * we will insert at tail of queue as desired.
1020
		 */
1021 1022 1023 1024
	}
	else
	{
		/* I hold no locks, so I can't push in front of anyone. */
J
Jan Wieck 已提交
1025
		proc = (PGPROC *) &(waitQueue->links);
V
Vadim B. Mikheev 已提交
1026
	}
1027

1028
	/*
B
Bruce Momjian 已提交
1029
	 * Insert self into queue, ahead of the given proc (or at tail of queue).
1030
	 */
1031
	SHMQueueInsertBefore(&(proc->links), &(MyProc->links));
B
Bruce Momjian 已提交
1032
	waitQueue->size++;
1033

1034
	lock->waitMask |= LOCKBIT_ON(lockmode);
1035

J
Jan Wieck 已提交
1036
	/* Set up wait information in PGPROC object, too */
1037
	MyProc->waitLock = lock;
1038
	MyProc->waitProcLock = proclock;
1039 1040
	MyProc->waitLockMode = lockmode;

1041
	MyProc->waitStatus = STATUS_WAITING;
1042 1043

	/*
B
Bruce Momjian 已提交
1044 1045 1046
	 * If we detected deadlock, give up without waiting.  This must agree with
	 * CheckDeadLock's recovery code, except that we shouldn't release the
	 * semaphore since we haven't tried to lock it yet.
1047 1048 1049
	 */
	if (early_deadlock)
	{
1050
		RemoveFromWaitQueue(MyProc, hashcode);
1051 1052
		return STATUS_ERROR;
	}
1053

1054
	/* mark that we are waiting for a lock */
1055
	lockAwaited = locallock;
1056

1057
	/*
1058
	 * Release the lock table's partition lock.
1059
	 *
1060
	 * NOTE: this may also cause us to exit critical-section state, possibly
B
Bruce Momjian 已提交
1061 1062
	 * allowing a cancel/die interrupt to be accepted. This is OK because we
	 * have recorded the fact that we are waiting for a lock, and so
1063
	 * LockErrorCleanup will clean up if cancel/die happens.
1064
	 */
1065
	LWLockRelease(partitionLock);
1066

1067 1068 1069
	/*
	 * Also, now that we will successfully clean up after an ereport, it's
	 * safe to check to see if there's a buffer pin deadlock against the
1070 1071
	 * Startup process.  Of course, that's only necessary if we're doing Hot
	 * Standby and are not the Startup process ourselves.
1072 1073 1074 1075
	 */
	if (RecoveryInProgress() && !InRecovery)
		CheckRecoveryConflictDeadlock();

1076
	/* Reset deadlock_state before enabling the timeout handler */
1077
	deadlock_state = DS_NOT_YET_CHECKED;
1078
	got_deadlock_timeout = false;
1079

1080
	/*
B
Bruce Momjian 已提交
1081 1082 1083 1084
	 * Set timer so we can wake up after awhile and check for a deadlock. If a
	 * deadlock is detected, the handler releases the process's semaphore and
	 * sets MyProc->waitStatus = STATUS_ERROR, allowing us to know that we
	 * must report failure rather than success.
1085
	 *
1086 1087
	 * By delaying the check until we've waited for a bit, we can avoid
	 * running the rather expensive deadlock-check code in most cases.
1088 1089 1090
	 *
	 * If LockTimeout is set, also enable the timeout for that.  We can save a
	 * few cycles by enabling both timeout sources in one call.
1091
	 */
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
	if (LockTimeout > 0)
	{
		EnableTimeoutParams timeouts[2];

		timeouts[0].id = DEADLOCK_TIMEOUT;
		timeouts[0].type = TMPARAM_AFTER;
		timeouts[0].delay_ms = DeadlockTimeout;
		timeouts[1].id = LOCK_TIMEOUT;
		timeouts[1].type = TMPARAM_AFTER;
		timeouts[1].delay_ms = LockTimeout;
		enable_timeouts(timeouts, 2);
	}
	else
		enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
1106

1107
	/*
1108 1109 1110 1111
	 * If somebody wakes us between LWLockRelease and WaitLatch, the latch
	 * will not wait. But a set latch does not necessarily mean that the lock
	 * is free now, as there are many other sources for latch sets than
	 * somebody releasing the lock.
1112
	 *
1113 1114 1115 1116 1117 1118 1119
	 * We process interrupts whenever the latch has been set, so cancel/die
	 * interrupts are processed quickly. This means we must not mind losing
	 * control to a cancel/die interrupt here.  We don't, because we have no
	 * shared-state-change work to do after being granted the lock (the
	 * grantor did it all).  We do have to worry about canceling the deadlock
	 * timeout and updating the locallock table, but if we lose control to an
	 * error, LockErrorCleanup will fix that up.
1120
	 */
B
Bruce Momjian 已提交
1121 1122
	do
	{
1123 1124 1125 1126 1127 1128 1129 1130 1131
		WaitLatch(MyLatch, WL_LATCH_SET, 0);
		ResetLatch(MyLatch);
		/* check for deadlocks first, as that's probably log-worthy */
		if (got_deadlock_timeout)
		{
			CheckDeadLock();
			got_deadlock_timeout = false;
		}
		CHECK_FOR_INTERRUPTS();
1132

1133 1134
		/*
		 * waitStatus could change from STATUS_WAITING to something else
B
Bruce Momjian 已提交
1135
		 * asynchronously.  Read it just once per loop to prevent surprising
1136 1137
		 * behavior (such as missing log messages).
		 */
1138
		myWaitStatus = *((volatile int *) &MyProc->waitStatus);
1139

1140 1141
		/*
		 * If we are not deadlocked, but are waiting on an autovacuum-induced
B
Bruce Momjian 已提交
1142
		 * task, send a signal to interrupt it.
1143 1144 1145
		 */
		if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel)
		{
B
Bruce Momjian 已提交
1146
			PGPROC	   *autovac = GetBlockingAutoVacuumPgproc();
1147
			PGXACT	   *autovac_pgxact = &ProcGlobal->allPgXact[autovac->pgprocno];
1148 1149 1150 1151 1152 1153 1154

			LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

			/*
			 * Only do it if the worker is not working to protect against Xid
			 * wraparound.
			 */
S
Stephen Frost 已提交
1155
			if ((autovac_pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) &&
1156
				!(autovac_pgxact->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND))
1157
			{
B
Bruce Momjian 已提交
1158
				int			pid = autovac->pid;
1159
				StringInfoData locktagbuf;
B
Bruce Momjian 已提交
1160
				StringInfoData logbuf;	/* errdetail for server log */
1161 1162 1163 1164 1165

				initStringInfo(&locktagbuf);
				initStringInfo(&logbuf);
				DescribeLockTag(&locktagbuf, &lock->tag);
				appendStringInfo(&logbuf,
B
Bruce Momjian 已提交
1166 1167 1168 1169 1170
								 _("Process %d waits for %s on %s."),
								 MyProcPid,
							  GetLockmodeName(lock->tag.locktag_lockmethodid,
											  lockmode),
								 locktagbuf.data);
1171 1172 1173

				/* release lock as quickly as possible */
				LWLockRelease(ProcArrayLock);
1174

1175 1176
				/* send the autovacuum worker Back to Old Kent Road */
				ereport(DEBUG1,
B
Bruce Momjian 已提交
1177 1178 1179
					  (errmsg("sending cancel to blocking autovacuum PID %d",
							  pid),
					   errdetail_log("%s", logbuf.data)));
1180 1181 1182

				if (kill(pid, SIGINT) < 0)
				{
1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
					/*
					 * There's a race condition here: once we release the
					 * ProcArrayLock, it's possible for the autovac worker to
					 * close up shop and exit before we can do the kill().
					 * Therefore, we do not whinge about no-such-process.
					 * Other errors such as EPERM could conceivably happen if
					 * the kernel recycles the PID fast enough, but such cases
					 * seem improbable enough that it's probably best to issue
					 * a warning if we see some other errno.
					 */
					if (errno != ESRCH)
						ereport(WARNING,
						   (errmsg("could not send signal to process %d: %m",
								   pid)));
1197
				}
1198 1199 1200

				pfree(logbuf.data);
				pfree(locktagbuf.data);
1201 1202 1203 1204 1205 1206 1207 1208
			}
			else
				LWLockRelease(ProcArrayLock);

			/* prevent signal from being resent more than once */
			allow_autovacuum_cancel = false;
		}

1209 1210 1211 1212
		/*
		 * If awoken after the deadlock check interrupt has run, and
		 * log_lock_waits is on, then report about the wait.
		 */
1213
		if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED)
1214
		{
1215 1216 1217
			StringInfoData buf,
						lock_waiters_sbuf,
						lock_holders_sbuf;
1218 1219 1220 1221
			const char *modename;
			long		secs;
			int			usecs;
			long		msecs;
1222 1223 1224 1225 1226
			SHM_QUEUE  *procLocks;
			PROCLOCK   *proclock;
			bool		first_holder = true,
						first_waiter = true;
			int			lockHoldersNum = 0;
1227 1228

			initStringInfo(&buf);
1229 1230 1231
			initStringInfo(&lock_waiters_sbuf);
			initStringInfo(&lock_holders_sbuf);

1232 1233 1234
			DescribeLockTag(&buf, &locallock->tag.lock);
			modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid,
									   lockmode);
1235 1236
			TimestampDifference(get_timeout_start_time(DEADLOCK_TIMEOUT),
								GetCurrentTimestamp(),
1237 1238 1239 1240
								&secs, &usecs);
			msecs = secs * 1000 + usecs / 1000;
			usecs = usecs % 1000;

1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294
			/*
			 * we loop over the lock's procLocks to gather a list of all
			 * holders and waiters. Thus we will be able to provide more
			 * detailed information for lock debugging purposes.
			 *
			 * lock->procLocks contains all processes which hold or wait for
			 * this lock.
			 */

			LWLockAcquire(partitionLock, LW_SHARED);

			procLocks = &(lock->procLocks);
			proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
											   offsetof(PROCLOCK, lockLink));

			while (proclock)
			{
				/*
				 * we are a waiter if myProc->waitProcLock == proclock; we are
				 * a holder if it is NULL or something different
				 */
				if (proclock->tag.myProc->waitProcLock == proclock)
				{
					if (first_waiter)
					{
						appendStringInfo(&lock_waiters_sbuf, "%d",
										 proclock->tag.myProc->pid);
						first_waiter = false;
					}
					else
						appendStringInfo(&lock_waiters_sbuf, ", %d",
										 proclock->tag.myProc->pid);
				}
				else
				{
					if (first_holder)
					{
						appendStringInfo(&lock_holders_sbuf, "%d",
										 proclock->tag.myProc->pid);
						first_holder = false;
					}
					else
						appendStringInfo(&lock_holders_sbuf, ", %d",
										 proclock->tag.myProc->pid);

					lockHoldersNum++;
				}

				proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink,
											   offsetof(PROCLOCK, lockLink));
			}

			LWLockRelease(partitionLock);

1295 1296 1297
			if (deadlock_state == DS_SOFT_DEADLOCK)
				ereport(LOG,
						(errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms",
1298 1299 1300 1301
								MyProcPid, modename, buf.data, msecs, usecs),
						 (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.",
						   "Processes holding the lock: %s. Wait queue: %s.",
											   lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data))));
1302
			else if (deadlock_state == DS_HARD_DEADLOCK)
1303
			{
1304
				/*
B
Bruce Momjian 已提交
1305 1306 1307 1308
				 * This message is a bit redundant with the error that will be
				 * reported subsequently, but in some cases the error report
				 * might not make it to the log (eg, if it's caught by an
				 * exception handler), and we want to ensure all long-wait
1309 1310 1311 1312
				 * events get logged.
				 */
				ereport(LOG,
						(errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms",
1313 1314 1315 1316
								MyProcPid, modename, buf.data, msecs, usecs),
						 (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.",
						   "Processes holding the lock: %s. Wait queue: %s.",
											   lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data))));
1317
			}
1318 1319 1320 1321

			if (myWaitStatus == STATUS_WAITING)
				ereport(LOG,
						(errmsg("process %d still waiting for %s on %s after %ld.%03d ms",
1322 1323 1324 1325
								MyProcPid, modename, buf.data, msecs, usecs),
						 (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.",
						   "Processes holding the lock: %s. Wait queue: %s.",
											   lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data))));
1326 1327
			else if (myWaitStatus == STATUS_OK)
				ereport(LOG,
B
Bruce Momjian 已提交
1328 1329
					(errmsg("process %d acquired %s on %s after %ld.%03d ms",
							MyProcPid, modename, buf.data, msecs, usecs)));
1330 1331 1332
			else
			{
				Assert(myWaitStatus == STATUS_ERROR);
B
Bruce Momjian 已提交
1333

1334 1335
				/*
				 * Currently, the deadlock checker always kicks its own
B
Bruce Momjian 已提交
1336 1337 1338 1339 1340
				 * process, which means that we'll only see STATUS_ERROR when
				 * deadlock_state == DS_HARD_DEADLOCK, and there's no need to
				 * print redundant messages.  But for completeness and
				 * future-proofing, print a message if it looks like someone
				 * else kicked us off the lock.
1341 1342 1343 1344
				 */
				if (deadlock_state != DS_HARD_DEADLOCK)
					ereport(LOG,
							(errmsg("process %d failed to acquire %s on %s after %ld.%03d ms",
1345 1346 1347 1348
								MyProcPid, modename, buf.data, msecs, usecs),
							 (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.",
						   "Processes holding the lock: %s. Wait queue: %s.",
												   lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data))));
1349 1350 1351
			}

			/*
B
Bruce Momjian 已提交
1352 1353
			 * At this point we might still need to wait for the lock. Reset
			 * state so we don't print the above messages again.
1354 1355 1356 1357
			 */
			deadlock_state = DS_NO_DEADLOCK;

			pfree(buf.data);
1358 1359
			pfree(lock_holders_sbuf.data);
			pfree(lock_waiters_sbuf.data);
1360
		}
1361
	} while (myWaitStatus == STATUS_WAITING);
1362

1363
	/*
1364 1365 1366 1367
	 * Disable the timers, if they are still running.  As in LockErrorCleanup,
	 * we must preserve the LOCK_TIMEOUT indicator flag: if a lock timeout has
	 * already caused QueryCancelPending to become set, we want the cancel to
	 * be reported as a lock timeout, not a user cancel.
B
Bruce Momjian 已提交
1368
	 */
1369 1370 1371 1372 1373 1374 1375
	if (LockTimeout > 0)
	{
		DisableTimeoutParams timeouts[2];

		timeouts[0].id = DEADLOCK_TIMEOUT;
		timeouts[0].keep_indicator = false;
		timeouts[1].id = LOCK_TIMEOUT;
1376
		timeouts[1].keep_indicator = true;
1377 1378 1379 1380
		disable_timeouts(timeouts, 2);
	}
	else
		disable_timeout(DEADLOCK_TIMEOUT, false);
B
Bruce Momjian 已提交
1381

1382
	/*
B
Bruce Momjian 已提交
1383 1384 1385
	 * Re-acquire the lock table's partition lock.  We have to do this to hold
	 * off cancel/die interrupts before we can mess with lockAwaited (else we
	 * might have a missed or duplicated locallock update).
1386
	 */
1387
	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
1388 1389

	/*
1390
	 * We no longer want LockErrorCleanup to do anything.
1391
	 */
1392
	lockAwaited = NULL;
1393

1394
	/*
1395
	 * If we got the lock, be sure to remember it in the locallock table.
1396
	 */
1397
	if (MyProc->waitStatus == STATUS_OK)
1398
		GrantAwaitedLock();
1399

1400 1401 1402 1403
	/*
	 * We don't have to do anything else, because the awaker did all the
	 * necessary update of the lock table and MyProc.
	 */
1404
	return MyProc->waitStatus;
1405 1406 1407 1408 1409 1410
}


/*
 * ProcWakeup -- wake up a process by releasing its private semaphore.
 *
1411
 *	 Also remove the process from the wait queue and set its links invalid.
1412
 *	 RETURN: the next process in the wait queue.
1413
 *
1414 1415
 * The appropriate lock partition lock must be held by caller.
 *
1416 1417 1418
 * XXX: presently, this code is only used for the "success" case, and only
 * works correctly for that case.  To clean up in failure case, would need
 * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
1419
 * Hence, in practice the waitStatus parameter must be STATUS_OK.
1420
 */
J
Jan Wieck 已提交
1421
PGPROC *
1422
ProcWakeup(PGPROC *proc, int waitStatus)
1423
{
J
Jan Wieck 已提交
1424
	PGPROC	   *retProc;
1425

1426
	/* Proc should be sleeping ... */
1427 1428
	if (proc->links.prev == NULL ||
		proc->links.next == NULL)
1429
		return NULL;
1430
	Assert(proc->waitStatus == STATUS_WAITING);
1431

1432
	/* Save next process before we zap the list link */
1433
	retProc = (PGPROC *) proc->links.next;
1434

1435
	/* Remove process from wait queue */
1436
	SHMQueueDelete(&(proc->links));
1437
	(proc->waitLock->waitProcs.size)--;
1438

1439 1440
	/* Clean up process' state and pass it the ok/fail signal */
	proc->waitLock = NULL;
1441
	proc->waitProcLock = NULL;
1442
	proc->waitStatus = waitStatus;
1443

1444
	/* And awaken it */
1445
	SetLatch(&proc->procLatch);
1446 1447

	return retProc;
1448 1449 1450 1451
}

/*
 * ProcLockWakeup -- routine for waking up processes when a lock is
1452 1453
 *		released (or a prior waiter is aborted).  Scan all waiters
 *		for lock, waken any that are no longer blocked.
1454 1455
 *
 * The appropriate lock partition lock must be held by caller.
1456
 */
1457
void
1458
ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
1459
{
1460 1461
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			queue_size = waitQueue->size;
J
Jan Wieck 已提交
1462
	PGPROC	   *proc;
1463
	LOCKMASK	aheadRequests = 0;
M
 
Marc G. Fournier 已提交
1464

1465
	Assert(queue_size >= 0);
1466

1467 1468
	if (queue_size == 0)
		return;
1469

1470
	proc = (PGPROC *) waitQueue->links.next;
1471

1472 1473
	while (queue_size-- > 0)
	{
B
Bruce Momjian 已提交
1474
		LOCKMODE	lockmode = proc->waitLockMode;
M
 
Marc G. Fournier 已提交
1475 1476

		/*
B
Bruce Momjian 已提交
1477 1478
		 * Waken if (a) doesn't conflict with requests of earlier waiters, and
		 * (b) doesn't conflict with already-held locks.
M
 
Marc G. Fournier 已提交
1479
		 */
B
Bruce Momjian 已提交
1480
		if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
1481 1482 1483
			LockCheckConflicts(lockMethodTable,
							   lockmode,
							   lock,
1484
							   proc->waitProcLock) == STATUS_OK)
M
 
Marc G. Fournier 已提交
1485
		{
1486
			/* OK to waken */
1487
			GrantLock(lock, proc->waitProcLock, lockmode);
1488
			proc = ProcWakeup(proc, STATUS_OK);
B
Bruce Momjian 已提交
1489

1490
			/*
B
Bruce Momjian 已提交
1491 1492 1493
			 * ProcWakeup removes proc from the lock's waiting process queue
			 * and returns the next proc in chain; don't use proc's next-link,
			 * because it's been cleared.
1494
			 */
M
 
Marc G. Fournier 已提交
1495
		}
1496
		else
1497
		{
B
Bruce Momjian 已提交
1498
			/*
B
Bruce Momjian 已提交
1499
			 * Cannot wake this guy. Remember his request for later checks.
B
Bruce Momjian 已提交
1500
			 */
1501
			aheadRequests |= LOCKBIT_ON(lockmode);
1502
			proc = (PGPROC *) proc->links.next;
1503
		}
M
 
Marc G. Fournier 已提交
1504
	}
1505 1506

	Assert(waitQueue->size >= 0);
1507 1508
}

1509 1510 1511
/*
 * CheckDeadLock
 *
1512 1513 1514 1515 1516
 * We only get to this routine, if DEADLOCK_TIMEOUT fired while waiting for a
 * lock to be released by some other process.  Check if there's a deadlock; if
 * not, just return.  (But signal ProcSleep to log a message, if
 * log_lock_waits is true.)  If we have a real deadlock, remove ourselves from
 * the lock's wait queue and signal an error to ProcSleep.
1517
 */
1518
static void
1519
CheckDeadLock(void)
1520
{
1521 1522
	int			i;

1523
	/*
B
Bruce Momjian 已提交
1524 1525
	 * Acquire exclusive lock on the entire shared lock data structures. Must
	 * grab LWLocks in partition-number order to avoid LWLock deadlock.
1526 1527 1528 1529 1530 1531
	 *
	 * Note that the deadlock check interrupt had better not be enabled
	 * anywhere that this process itself holds lock partition locks, else this
	 * will wait forever.  Also note that LWLockAcquire creates a critical
	 * section, so that this routine cannot be interrupted by cancel/die
	 * interrupts.
1532
	 */
1533
	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
1534
		LWLockAcquire(LockHashPartitionLockByIndex(i), LW_EXCLUSIVE);
1535

1536
	/*
1537 1538
	 * Check to see if we've been awoken by anyone in the interim.
	 *
1539
	 * If we have, we can return and resume our transaction -- happy day.
1540 1541
	 * Before we are awoken the process releasing the lock grants it to us so
	 * we know that we don't have to wait anymore.
1542
	 *
1543
	 * We check by looking to see if we've been unlinked from the wait queue.
B
Bruce Momjian 已提交
1544
	 * This is quicker than checking our semaphore's state, since no kernel
1545
	 * call is needed, and it is safe because we hold the lock partition lock.
1546
	 */
1547 1548
	if (MyProc->links.prev == NULL ||
		MyProc->links.next == NULL)
1549 1550 1551 1552 1553 1554 1555 1556 1557 1558
		goto check_done;

#ifdef LOCK_DEBUG
	if (Debug_deadlocks)
		DumpAllLocks();
#endif

	/* Run the deadlock check, and set deadlock_state for use by ProcSleep */
	deadlock_state = DeadLockCheck(MyProc);

1559
	if (deadlock_state == DS_HARD_DEADLOCK)
B
Bruce Momjian 已提交
1560
	{
1561 1562 1563
		/*
		 * Oops.  We have a deadlock.
		 *
1564 1565 1566 1567
		 * Get this process out of wait state. (Note: we could do this more
		 * efficiently by relying on lockAwaited, but use this coding to
		 * preserve the flexibility to kill some other transaction than the
		 * one detecting the deadlock.)
1568 1569
		 *
		 * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
1570 1571
		 * ProcSleep will report an error after we return from the signal
		 * handler.
1572 1573 1574
		 */
		Assert(MyProc->waitLock != NULL);
		RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
1575

1576
		/*
1577 1578 1579 1580 1581 1582 1583 1584
		 * We're done here.  Transaction abort caused by the error that
		 * ProcSleep will raise will cause any other locks we hold to be
		 * released, thus allowing other processes to wake up; we don't need
		 * to do that here.  NOTE: an exception is that releasing locks we
		 * hold doesn't consider the possibility of waiters that were blocked
		 * behind us on the lock we just failed to get, and might now be
		 * wakable because we're not in front of them anymore.  However,
		 * RemoveFromWaitQueue took care of waking up any such processes.
1585 1586
		 */
	}
1587 1588

	/*
B
Bruce Momjian 已提交
1589 1590 1591 1592 1593
	 * And release locks.  We do this in reverse order for two reasons: (1)
	 * Anyone else who needs more than one of the locks will be trying to lock
	 * them in increasing order; we don't want to release the other process
	 * until it can get all the locks it needs. (2) This avoids O(N^2)
	 * behavior inside LWLockRelease.
1594
	 */
1595
check_done:
B
Bruce Momjian 已提交
1596
	for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
1597
		LWLockRelease(LockHashPartitionLockByIndex(i));
1598 1599
}

1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610
/*
 * CheckDeadLockAlert - Handle the expiry of deadlock_timeout.
 *
 * NB: Runs inside a signal handler, be careful.
 */
void
CheckDeadLockAlert(void)
{
	int			save_errno = errno;

	got_deadlock_timeout = true;
B
Bruce Momjian 已提交
1611

1612 1613 1614 1615 1616 1617 1618 1619
	/*
	 * Have to set the latch again, even if handle_sig_alarm already did. Back
	 * then got_deadlock_timeout wasn't yet set... It's unlikely that this
	 * ever would be a problem, but setting a set latch again is cheap.
	 */
	SetLatch(MyLatch);
	errno = save_errno;
}
1620

1621 1622 1623
/*
 * ProcWaitForSignal - wait for a signal from another backend.
 *
1624 1625 1626
 * As this uses the generic process latch the caller has to be robust against
 * unrelated wakeups: Always check that the desired state has occurred, and
 * wait again if not.
1627 1628 1629 1630
 */
void
ProcWaitForSignal(void)
{
1631 1632 1633
	WaitLatch(MyLatch, WL_LATCH_SET, 0);
	ResetLatch(MyLatch);
	CHECK_FOR_INTERRUPTS();
1634 1635 1636
}

/*
1637
 * ProcSendSignal - send a signal to a backend identified by PID
1638 1639
 */
void
1640
ProcSendSignal(int pid)
1641
{
1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652
	PGPROC	   *proc = NULL;

	if (RecoveryInProgress())
	{
		/* use volatile pointer to prevent code rearrangement */
		volatile PROC_HDR *procglobal = ProcGlobal;

		SpinLockAcquire(ProcStructLock);

		/*
		 * Check to see whether it is the Startup process we wish to signal.
B
Bruce Momjian 已提交
1653 1654
		 * This call is made by the buffer manager when it wishes to wake up a
		 * process that has been waiting for a pin in so it can obtain a
1655
		 * cleanup lock using LockBufferForCleanup(). Startup is not a normal
B
Bruce Momjian 已提交
1656 1657
		 * backend, so BackendPidGetProc() will not return any pid at all. So
		 * we remember the information for this special case.
1658 1659 1660 1661 1662 1663 1664 1665 1666
		 */
		if (pid == procglobal->startupProcPid)
			proc = procglobal->startupProc;

		SpinLockRelease(ProcStructLock);
	}

	if (proc == NULL)
		proc = BackendPidGetProc(pid);
1667 1668

	if (proc != NULL)
1669 1670 1671
	{
		SetLatch(&proc->procLatch);
	}
1672
}