proc.c 54.7 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * proc.c
4
 *	  routines to manage per-process shared memory data structure
5
 *
B
Bruce Momjian 已提交
6
 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  src/backend/storage/lmgr/proc.c
12 13 14 15 16
 *
 *-------------------------------------------------------------------------
 */
/*
 * Interface (a):
17
 *		ProcSleep(), ProcWakeup(),
18 19
 *		ProcQueueAlloc() -- create a shm queue for sleeping processes
 *		ProcQueueInit() -- create a queue without allocing memory
20
 *
21 22
 * Waiting for a lock causes the backend to be put to sleep.  Whoever releases
 * the lock wakes the process up again (and gives it an error code so it knows
23 24 25 26
 * whether it was awoken on an error condition).
 *
 * Interface (b):
 *
27 28
 * ProcReleaseLocks -- frees the locks associated with current transaction
 *
29
 * ProcKill -- destroys the shared memory state (and locks)
30
 * associated with the process.
31
 */
32 33
#include "postgres.h"

34
#include <signal.h>
35 36
#include <unistd.h>
#include <sys/time.h>
M
Marc G. Fournier 已提交
37

38
#include "access/transam.h"
39
#include "access/twophase.h"
40
#include "access/xact.h"
41
#include "miscadmin.h"
42
#include "postmaster/autovacuum.h"
43
#include "replication/syncrep.h"
44
#include "storage/ipc.h"
45
#include "storage/lmgr.h"
46
#include "storage/pmsignal.h"
47
#include "storage/proc.h"
48
#include "storage/procarray.h"
49
#include "storage/procsignal.h"
50
#include "storage/spin.h"
51
#include "utils/timestamp.h"
52

53

54
/* GUC variables */
B
Bruce Momjian 已提交
55
int			DeadlockTimeout = 1000;
56
int			StatementTimeout = 0;
57
bool		log_lock_waits = false;
M
 
Marc G. Fournier 已提交
58

59
/* Pointer to this process's PGPROC struct, if any */
J
Jan Wieck 已提交
60
PGPROC	   *MyProc = NULL;
61
PGXACT	   *MyPgXact = NULL;
62 63

/*
J
Jan Wieck 已提交
64
 * This spinlock protects the freelist of recycled PGPROC structures.
65
 * We cannot use an LWLock because the LWLock manager depends on already
J
Jan Wieck 已提交
66
 * having a PGPROC and a wait semaphore!  But these structures are touched
67 68
 * relatively infrequently (only at backend startup or shutdown) and not for
 * very long, so a spinlock is okay.
69
 */
70
NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
71

72
/* Pointers to shared-memory structures */
73
PROC_HDR *ProcGlobal = NULL;
74
NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL;
75
PGPROC *PreparedXactProcs = NULL;
76

77 78
/* If we are waiting for a lock, this points to the associated LOCALLOCK */
static LOCALLOCK *lockAwaited = NULL;
79

80
/* Mark these volatile because they can be changed by signal handler */
81
static volatile bool standby_timeout_active = false;
82 83
static volatile bool statement_timeout_active = false;
static volatile bool deadlock_timeout_active = false;
84
static volatile DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
85
volatile bool cancel_from_timeout = false;
B
Bruce Momjian 已提交
86

87 88 89
/* timeout_start_time is set when log_lock_waits is true */
static TimestampTz timeout_start_time;

90
/* statement_fin_time is valid only if statement_timeout_active is true */
91
static TimestampTz statement_fin_time;
92
static TimestampTz statement_fin_time2; /* valid only in recovery */
93 94


95
static void RemoveProcFromArray(int code, Datum arg);
96
static void ProcKill(int code, Datum arg);
97
static void AuxiliaryProcKill(int code, Datum arg);
98
static bool CheckStatementTimeout(void);
99
static bool CheckStandbyTimeout(void);
100

V
Vadim B. Mikheev 已提交
101

102 103 104
/*
 * Report shared-memory space needed by InitProcGlobal.
 */
105
Size
106
ProcGlobalShmemSize(void)
107
{
108 109 110 111
	Size		size = 0;

	/* ProcGlobal */
	size = add_size(size, sizeof(PROC_HDR));
112
	/* MyProcs, including autovacuum workers and launcher */
113
	size = add_size(size, mul_size(MaxBackends, sizeof(PGPROC)));
114 115 116 117
	/* AuxiliaryProcs */
	size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC)));
	/* Prepared xacts */
	size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGPROC)));
118 119
	/* ProcStructLock */
	size = add_size(size, sizeof(slock_t));
120

121 122 123 124
	size = add_size(size, mul_size(MaxBackends, sizeof(PGXACT)));
	size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGXACT)));
	size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGXACT)));

125 126 127
	return size;
}

128 129 130 131
/*
 * Report number of semaphores needed by InitProcGlobal.
 */
int
132
ProcGlobalSemas(void)
133
{
134 135 136 137
	/*
	 * We need a sema per backend (including autovacuum), plus one for each
	 * auxiliary process.
	 */
138
	return MaxBackends + NUM_AUXILIARY_PROCS;
139 140
}

141 142
/*
 * InitProcGlobal -
143 144
 *	  Initialize the global process table during postmaster or standalone
 *	  backend startup.
145
 *
146
 *	  We also create all the per-process semaphores we will need to support
147 148 149 150 151 152 153
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
 *	  (mis)configured with small limits on the number of semaphores, and
 *	  running out when trying to start another backend is a common failure.
 *	  So, now we grab enough semaphores to support the desired max number
 *	  of backends immediately at initialization --- if the sysadmin has set
154 155
 *	  MaxConnections or autovacuum_max_workers higher than his kernel will
 *	  support, he'll find out sooner rather than later.
156 157 158 159
 *
 *	  Another reason for creating semaphores here is that the semaphore
 *	  implementation typically requires us to create semaphores in the
 *	  postmaster, not in backends.
160 161
 *
 * Note: this is NOT called by individual backends under a postmaster,
162
 * not even in the EXEC_BACKEND case.  The ProcGlobal and AuxiliaryProcs
163
 * pointers must be propagated specially for EXEC_BACKEND operation.
164 165
 */
void
166
InitProcGlobal(void)
167
{
168
	PGPROC	   *procs;
169
	PGXACT	   *pgxacts;
170 171
	int			i,
				j;
172
	bool		found;
173
	uint32		TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts;
174

175
	/* Create the ProcGlobal shared structure */
176
	ProcGlobal = (PROC_HDR *)
177 178
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
	Assert(!found);
179

180 181 182
	/*
	 * Initialize the data structures.
	 */
R
Robert Haas 已提交
183
	ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY;
184 185
	ProcGlobal->freeProcs = NULL;
	ProcGlobal->autovacFreeProcs = NULL;
186 187 188
	ProcGlobal->startupProc = NULL;
	ProcGlobal->startupProcPid = 0;
	ProcGlobal->startupBufferPinWaitBufId = -1;
189

190
	/*
R
Robert Haas 已提交
191 192 193
	 * Create and initialize all the PGPROC structures we'll need (except for
	 * those used for 2PC, which are embedded within a GlobalTransactionData
	 * struct).
194
	 *
195 196 197 198 199
	 * There are four separate consumers of PGPROC structures: (1) normal
	 * backends, (2) autovacuum workers and the autovacuum launcher, (3)
	 * auxiliary processes, and (4) prepared transactions.  Each PGPROC
	 * structure is dedicated to exactly one of these purposes, and they do
	 * not move between groups.
200
	 */
R
Robert Haas 已提交
201
	procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC));
202 203
	ProcGlobal->allProcs = procs;
	ProcGlobal->allProcCount = TotalProcs;
204 205 206 207
	if (!procs)
		ereport(FATAL,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of shared memory")));
R
Robert Haas 已提交
208
	MemSet(procs, 0, TotalProcs * sizeof(PGPROC));
209 210 211 212 213 214 215 216 217 218 219 220 221

	/*
	 * Also allocate a separate array of PGXACT structures.  This is separate
	 * from the main PGPROC array so that the most heavily accessed data is
	 * stored contiguously in memory in as few cache lines as possible. This
	 * provides significant performance benefits, especially on a
	 * multiprocessor system.  Thereis one PGXACT structure for every PGPROC
	 * structure.
	 */
	pgxacts = (PGXACT *) ShmemAlloc(TotalProcs * sizeof(PGXACT));
	MemSet(pgxacts, 0, TotalProcs * sizeof(PGXACT));
	ProcGlobal->allPgXact = pgxacts;

R
Robert Haas 已提交
222
	for (i = 0; i < TotalProcs; i++)
223
	{
R
Robert Haas 已提交
224
		/* Common initialization for all PGPROCs, regardless of type. */
225

226 227 228 229 230 231 232 233 234 235 236 237
		/*
		 * Set up per-PGPROC semaphore, latch, and backendLock. Prepared
		 * xact dummy PGPROCs don't need these though - they're never
		 * associated with a real process
		 */
		if (i < MaxBackends + NUM_AUXILIARY_PROCS)
		{
			PGSemaphoreCreate(&(procs[i].sem));
			InitSharedLatch(&(procs[i].procLatch));
			procs[i].backendLock = LWLockAssign();
		}
		procs[i].pgprocno = i;
R
Robert Haas 已提交
238 239 240 241 242 243

		/*
		 * Newly created PGPROCs for normal backends or for autovacuum must
		 * be queued up on the appropriate free list.  Because there can only
		 * ever be a small, fixed number of auxiliary processes, no free
		 * list is used in that case; InitAuxiliaryProcess() instead uses a
244 245
		 * linear search.  PGPROCs for prepared transactions are added to a
		 * free list by TwoPhaseShmemInit().
R
Robert Haas 已提交
246 247 248 249 250 251 252 253 254 255 256 257 258
		 */
		if (i < MaxConnections)
		{
			/* PGPROC for normal backend, add to freeProcs list */
			procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs;
			ProcGlobal->freeProcs = &procs[i];
		}
		else if (i < MaxBackends)
		{
			/* PGPROC for AV launcher/worker, add to autovacFreeProcs list */
			procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs;
			ProcGlobal->autovacFreeProcs = &procs[i];
		}
259 260 261 262

		/* Initialize myProcLocks[] shared memory queues. */
		for (j = 0; j < NUM_LOCK_PARTITIONS; j++)
			SHMQueueInit(&(procs[i].myProcLocks[j]));
263 264
	}

265
	/*
266 267
	 * Save pointers to the blocks of PGPROC structures reserved for
	 * auxiliary processes and prepared transactions.
268
	 */
R
Robert Haas 已提交
269
	AuxiliaryProcs = &procs[MaxBackends];
270
	PreparedXactProcs = &procs[MaxBackends + NUM_AUXILIARY_PROCS];
271 272 273 274

	/* Create ProcStructLock spinlock, too */
	ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
	SpinLockInit(ProcStructLock);
275 276
}

277
/*
278
 * InitProcess -- initialize a per-process data structure for this backend
279 280
 */
void
281
InitProcess(void)
282
{
283 284
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;
285 286

	/*
287 288
	 * ProcGlobal should be set up already (if we are a backend, we inherit
	 * this by fork() or EXEC_BACKEND mechanism from the postmaster).
289
	 */
290
	if (procglobal == NULL)
291
		elog(PANIC, "proc header uninitialized");
292 293

	if (MyProc != NULL)
294
		elog(ERROR, "you already exist");
295

296
	/*
B
Bruce Momjian 已提交
297 298
	 * Try to get a proc struct from the free list.  If this fails, we must be
	 * out of PGPROC structures (not to mention semaphores).
299
	 *
B
Bruce Momjian 已提交
300 301
	 * While we are holding the ProcStructLock, also copy the current shared
	 * estimate of spins_per_delay to local storage.
302
	 */
303
	SpinLockAcquire(ProcStructLock);
304

305 306
	set_spins_per_delay(procglobal->spins_per_delay);

307
	if (IsAnyAutoVacuumProcess())
308
		MyProc = procglobal->autovacFreeProcs;
309
	else
310
		MyProc = procglobal->freeProcs;
311

312
	if (MyProc != NULL)
313
	{
314
		if (IsAnyAutoVacuumProcess())
315
			procglobal->autovacFreeProcs = (PGPROC *) MyProc->links.next;
316
		else
317
			procglobal->freeProcs = (PGPROC *) MyProc->links.next;
318
		SpinLockRelease(ProcStructLock);
319 320 321 322
	}
	else
	{
		/*
B
Bruce Momjian 已提交
323 324
		 * If we reach here, all the PGPROCs are in use.  This is one of the
		 * possible places to detect "too many backends", so give the standard
325 326
		 * error message.  XXX do we need to give a different failure message
		 * in the autovacuum case?
327
		 */
328
		SpinLockRelease(ProcStructLock);
329 330 331
		ereport(FATAL,
				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
				 errmsg("sorry, too many clients already")));
332
	}
333
	MyPgXact = &ProcGlobal->allPgXact[MyProc->pgprocno];
334

335 336
	/*
	 * Now that we have a PGPROC, mark ourselves as an active postmaster
337
	 * child; this is so that the postmaster can detect it if we exit without
338 339
	 * cleaning up.  (XXX autovac launcher currently doesn't participate in
	 * this; it probably should.)
340
	 */
341
	if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess())
342
		MarkPostmasterChildActive();
343

344
	/*
345 346
	 * Initialize all fields of MyProc, except for those previously initialized
	 * by InitProcGlobal.
347
	 */
348
	SHMQueueElemInit(&(MyProc->links));
349
	MyProc->waitStatus = STATUS_OK;
350
	MyProc->lxid = InvalidLocalTransactionId;
351 352
	MyPgXact->xid = InvalidTransactionId;
	MyPgXact->xmin = InvalidTransactionId;
353
	MyProc->pid = MyProcPid;
354 355
	/* backendId, databaseId and roleId will be filled in later */
	MyProc->backendId = InvalidBackendId;
356
	MyProc->databaseId = InvalidOid;
357
	MyProc->roleId = InvalidOid;
358 359
	MyPgXact->inCommit = false;
	MyPgXact->vacuumFlags = 0;
360
	/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
361
	if (IsAutoVacuumWorkerProcess())
362
		MyPgXact->vacuumFlags |= PROC_IS_AUTOVACUUM;
363 364 365
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
366
	MyProc->waitLock = NULL;
367
	MyProc->waitProcLock = NULL;
368 369 370 371 372 373 374 375 376 377
#ifdef USE_ASSERT_CHECKING
	if (assert_enabled)
	{
		int i;

		/* Last process should have released all locks. */
		for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
			Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i])));
	}
#endif
378
	MyProc->recoveryConflictPending = false;
379

380
	/* Initialize fields for sync rep */
381 382 383 384
	MyProc->waitLSN.xlogid = 0;
	MyProc->waitLSN.xrecoff = 0;
	MyProc->syncRepState = SYNC_REP_NOT_WAITING;
	SHMQueueElemInit(&(MyProc->syncRepLinks));
385 386 387 388 389 390

	/*
	 * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch.
	 * Note that there's no particular need to do ResetLatch here.
	 */
	OwnLatch(&MyProc->procLatch);
391

392
	/*
393
	 * We might be reusing a semaphore that belonged to a failed process. So
B
Bruce Momjian 已提交
394
	 * be careful and reinitialize its value here.	(This is not strictly
395
	 * necessary anymore, but seems like a good idea for cleanliness.)
396
	 */
397
	PGSemaphoreReset(&MyProc->sem);
398

399
	/*
400
	 * Arrange to clean up at backend exit.
401
	 */
402
	on_shmem_exit(ProcKill, 0);
403 404

	/*
B
Bruce Momjian 已提交
405 406
	 * Now that we have a PGPROC, we could try to acquire locks, so initialize
	 * the deadlock checker.
407 408
	 */
	InitDeadLockChecking();
409 410
}

411 412 413 414
/*
 * InitProcessPhase2 -- make MyProc visible in the shared ProcArray.
 *
 * This is separate from InitProcess because we can't acquire LWLocks until
415 416
 * we've created a PGPROC, but in the EXEC_BACKEND case ProcArrayAdd won't
 * work until after we've done CreateSharedMemoryAndSemaphores.
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
 */
void
InitProcessPhase2(void)
{
	Assert(MyProc != NULL);

	/*
	 * Add our PGPROC to the PGPROC array in shared memory.
	 */
	ProcArrayAdd(MyProc);

	/*
	 * Arrange to clean that up at backend exit.
	 */
	on_shmem_exit(RemoveProcFromArray, 0);
}

434
/*
435
 * InitAuxiliaryProcess -- create a per-auxiliary-process data structure
436
 *
437 438
 * This is called by bgwriter and similar processes so that they will have a
 * MyProc value that's real enough to let them wait for LWLocks.  The PGPROC
439
 * and sema that are assigned are one of the extra ones created during
440
 * InitProcGlobal.
441
 *
442
 * Auxiliary processes are presently not expected to wait for real (lockmgr)
443
 * locks, so we need not set up the deadlock checker.  They are never added
B
Bruce Momjian 已提交
444
 * to the ProcArray or the sinval messaging mechanism, either.	They also
445 446
 * don't get a VXID assigned, since this is only useful when we actually
 * hold lockmgr locks.
447 448 449 450 451
 *
 * Startup process however uses locks but never waits for them in the
 * normal backend sense. Startup process also takes part in sinval messaging
 * as a sendOnly process, so never reads messages from sinval queue. So
 * Startup process does have a VXID and does show up in pg_locks.
452 453
 */
void
454
InitAuxiliaryProcess(void)
455
{
456
	PGPROC	   *auxproc;
457
	int			proctype;
J
Jan Wieck 已提交
458

459
	/*
460 461
	 * ProcGlobal should be set up already (if we are a backend, we inherit
	 * this by fork() or EXEC_BACKEND mechanism from the postmaster).
462
	 */
463
	if (ProcGlobal == NULL || AuxiliaryProcs == NULL)
464
		elog(PANIC, "proc header uninitialized");
465 466

	if (MyProc != NULL)
467
		elog(ERROR, "you already exist");
468

469
	/*
470
	 * We use the ProcStructLock to protect assignment and releasing of
471
	 * AuxiliaryProcs entries.
472
	 *
B
Bruce Momjian 已提交
473 474
	 * While we are holding the ProcStructLock, also copy the current shared
	 * estimate of spins_per_delay to local storage.
475 476 477 478 479
	 */
	SpinLockAcquire(ProcStructLock);

	set_spins_per_delay(ProcGlobal->spins_per_delay);

480
	/*
481
	 * Find a free auxproc ... *big* trouble if there isn't one ...
482
	 */
483
	for (proctype = 0; proctype < NUM_AUXILIARY_PROCS; proctype++)
484
	{
485 486
		auxproc = &AuxiliaryProcs[proctype];
		if (auxproc->pid == 0)
487 488
			break;
	}
489
	if (proctype >= NUM_AUXILIARY_PROCS)
490 491
	{
		SpinLockRelease(ProcStructLock);
492
		elog(FATAL, "all AuxiliaryProcs are in use");
493
	}
494

495
	/* Mark auxiliary proc as in use by me */
496
	/* use volatile pointer to prevent code rearrangement */
497
	((volatile PGPROC *) auxproc)->pid = MyProcPid;
498

499
	MyProc = auxproc;
500
	MyPgXact = &ProcGlobal->allPgXact[auxproc->pgprocno];
501 502 503

	SpinLockRelease(ProcStructLock);

504
	/*
505 506
	 * Initialize all fields of MyProc, except for those previously initialized
	 * by InitProcGlobal.
507 508
	 */
	SHMQueueElemInit(&(MyProc->links));
509
	MyProc->waitStatus = STATUS_OK;
510
	MyProc->lxid = InvalidLocalTransactionId;
511 512
	MyPgXact->xid = InvalidTransactionId;
	MyPgXact->xmin = InvalidTransactionId;
513
	MyProc->backendId = InvalidBackendId;
514
	MyProc->databaseId = InvalidOid;
515
	MyProc->roleId = InvalidOid;
516 517
	MyPgXact->inCommit = false;
	MyPgXact->vacuumFlags = 0;
518 519 520 521
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
	MyProc->waitLock = NULL;
522
	MyProc->waitProcLock = NULL;
523 524 525 526 527 528 529 530 531 532
#ifdef USE_ASSERT_CHECKING
	if (assert_enabled)
	{
		int i;

		/* Last process should have released all locks. */
		for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
			Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i])));
	}
#endif
533

534 535 536 537 538 539
	/*
	 * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch.
	 * Note that there's no particular need to do ResetLatch here.
	 */
	OwnLatch(&MyProc->procLatch);

540
	/*
B
Bruce Momjian 已提交
541
	 * We might be reusing a semaphore that belonged to a failed process. So
B
Bruce Momjian 已提交
542
	 * be careful and reinitialize its value here.	(This is not strictly
543
	 * necessary anymore, but seems like a good idea for cleanliness.)
544
	 */
545
	PGSemaphoreReset(&MyProc->sem);
546 547 548 549

	/*
	 * Arrange to clean up at process exit.
	 */
550
	on_shmem_exit(AuxiliaryProcKill, Int32GetDatum(proctype));
551 552
}

553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
/*
 * Record the PID and PGPROC structures for the Startup process, for use in
 * ProcSendSignal().  See comments there for further explanation.
 */
void
PublishStartupProcessInformation(void)
{
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

	SpinLockAcquire(ProcStructLock);

	procglobal->startupProc = MyProc;
	procglobal->startupProcPid = MyProcPid;

	SpinLockRelease(ProcStructLock);
}

571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
/*
 * Used from bufgr to share the value of the buffer that Startup waits on,
 * or to reset the value to "not waiting" (-1). This allows processing
 * of recovery conflicts for buffer pins. Set is made before backends look
 * at this value, so locking not required, especially since the set is
 * an atomic integer set operation.
 */
void
SetStartupBufferPinWaitBufId(int bufid)
{
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

	procglobal->startupBufferPinWaitBufId = bufid;
}

/*
 * Used by backends when they receive a request to check for buffer pin waits.
 */
int
GetStartupBufferPinWaitBufId(void)
{
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

596
	return procglobal->startupBufferPinWaitBufId;
597 598
}

599 600 601 602 603 604 605 606 607
/*
 * Check whether there are at least N free PGPROC objects.
 *
 * Note: this is designed on the assumption that N will generally be small.
 */
bool
HaveNFreeProcs(int n)
{
	PGPROC	   *proc;
B
Bruce Momjian 已提交
608

609 610 611 612 613
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

	SpinLockAcquire(ProcStructLock);

614
	proc = procglobal->freeProcs;
615

616
	while (n > 0 && proc != NULL)
617
	{
618
		proc = (PGPROC *) proc->links.next;
619 620 621 622 623 624 625 626
		n--;
	}

	SpinLockRelease(ProcStructLock);

	return (n <= 0);
}

627 628 629 630 631 632 633 634 635
bool
IsWaitingForLock(void)
{
	if (lockAwaited == NULL)
		return false;

	return true;
}

636 637 638 639
/*
 * Cancel any pending wait for lock, when aborting a transaction.
 *
 * (Normally, this would only happen if we accept a cancel/die
640
 * interrupt while waiting; but an ereport(ERROR) while waiting is
641 642
 * within the realm of possibility, too.)
 */
643
void
644 645
LockWaitCancel(void)
{
646 647
	LWLockId	partitionLock;

648
	/* Nothing to do if we weren't waiting for a lock */
649
	if (lockAwaited == NULL)
650
		return;
651

652
	/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
653
	disable_sig_alarm(false);
654 655

	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
656
	partitionLock = LockHashPartitionLock(lockAwaited->hashcode);
657
	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
658

659
	if (MyProc->links.next != NULL)
660 661
	{
		/* We could not have been granted the lock yet */
662
		RemoveFromWaitQueue(MyProc, lockAwaited->hashcode);
663 664 665 666 667
	}
	else
	{
		/*
		 * Somebody kicked us off the lock queue already.  Perhaps they
B
Bruce Momjian 已提交
668 669 670
		 * granted us the lock, or perhaps they detected a deadlock. If they
		 * did grant us the lock, we'd better remember it in our local lock
		 * table.
671
		 */
672 673
		if (MyProc->waitStatus == STATUS_OK)
			GrantAwaitedLock();
674 675
	}

676
	lockAwaited = NULL;
677

678
	LWLockRelease(partitionLock);
H
Hiroshi Inoue 已提交
679

680
	/*
681
	 * We used to do PGSemaphoreReset() here to ensure that our proc's wait
B
Bruce Momjian 已提交
682 683 684 685 686 687
	 * semaphore is reset to zero.	This prevented a leftover wakeup signal
	 * from remaining in the semaphore if someone else had granted us the lock
	 * we wanted before we were able to remove ourselves from the wait-list.
	 * However, now that ProcSleep loops until waitStatus changes, a leftover
	 * wakeup signal isn't harmful, and it seems not worth expending cycles to
	 * get rid of a signal that most likely isn't there.
688
	 */
H
Hiroshi Inoue 已提交
689
}
690

691

692
/*
693
 * ProcReleaseLocks() -- release locks associated with current transaction
694
 *			at main transaction commit or abort
695 696
 *
 * At main transaction commit, we release all locks except session locks.
697
 * At main transaction abort, we release all locks including session locks.
698 699
 *
 * At subtransaction commit, we don't release any locks (so this func is not
700
 * needed at all); we will defer the releasing to the parent transaction.
701
 * At subtransaction abort, we release all locks held by the subtransaction;
702 703
 * this is implemented by retail releasing of the locks under control of
 * the ResourceOwner mechanism.
704 705
 */
void
706
ProcReleaseLocks(bool isCommit)
707
{
708 709
	if (!MyProc)
		return;
710 711 712
	/* If waiting, get off wait queue (should only be needed after error) */
	LockWaitCancel();
	/* Release locks */
713
	LockReleaseAll(DEFAULT_LOCKMETHOD, !isCommit);
714 715 716

	/* Release transaction level advisory locks */
	LockReleaseAll(USER_LOCKMETHOD, false);
717 718 719
}


720 721 722 723 724 725 726
/*
 * RemoveProcFromArray() -- Remove this process from the shared ProcArray.
 */
static void
RemoveProcFromArray(int code, Datum arg)
{
	Assert(MyProc != NULL);
727
	ProcArrayRemove(MyProc, InvalidTransactionId);
728 729
}

730 731
/*
 * ProcKill() -- Destroy the per-proc data structure for
732
 *		this process. Release any of its held LW locks.
733 734
 */
static void
735
ProcKill(int code, Datum arg)
736
{
737 738 739
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

740
	Assert(MyProc != NULL);
741

742 743 744
	/* Make sure we're out of the sync rep lists */
	SyncRepCleanupAtProcExit();

745 746 747 748 749 750 751 752 753 754 755
#ifdef USE_ASSERT_CHECKING
	if (assert_enabled)
	{
		int i;

		/* Last process should have released all locks. */
		for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
			Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i])));
	}
#endif

756
	/*
B
Bruce Momjian 已提交
757 758
	 * Release any LW locks I am holding.  There really shouldn't be any, but
	 * it's cheap to check again before we cut the knees off the LWLock
759
	 * facility by releasing our PGPROC ...
760
	 */
761
	LWLockReleaseAll();
762

763 764 765
	/* Release ownership of the process's latch, too */
	DisownLatch(&MyProc->procLatch);

766
	SpinLockAcquire(ProcStructLock);
767

768 769
	/* Return PGPROC structure (and semaphore) to appropriate freelist */
	if (IsAnyAutoVacuumProcess())
770
	{
771 772
		MyProc->links.next = (SHM_QUEUE *) procglobal->autovacFreeProcs;
		procglobal->autovacFreeProcs = MyProc;
773 774 775
	}
	else
	{
776 777
		MyProc->links.next = (SHM_QUEUE *) procglobal->freeProcs;
		procglobal->freeProcs = MyProc;
778
	}
779

J
Jan Wieck 已提交
780
	/* PGPROC struct isn't mine anymore */
781
	MyProc = NULL;
782

783 784 785
	/* Update shared estimate of spins_per_delay */
	procglobal->spins_per_delay = update_spins_per_delay(procglobal->spins_per_delay);

786
	SpinLockRelease(ProcStructLock);
787

788 789
	/*
	 * This process is no longer present in shared memory in any meaningful
B
Bruce Momjian 已提交
790 791
	 * way, so tell the postmaster we've cleaned up acceptably well. (XXX
	 * autovac launcher should be included here someday)
792
	 */
793
	if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess())
794 795
		MarkPostmasterChildInactive();

796 797
	/* wake autovac launcher if needed -- see comments in FreeWorkerInfo */
	if (AutovacuumLauncherPid != 0)
798
		kill(AutovacuumLauncherPid, SIGUSR2);
799 800 801
}

/*
802 803 804
 * AuxiliaryProcKill() -- Cut-down version of ProcKill for auxiliary
 *		processes (bgwriter, etc).	The PGPROC and sema are not released, only
 *		marked as not-in-use.
805 806
 */
static void
807
AuxiliaryProcKill(int code, Datum arg)
808
{
B
Bruce Momjian 已提交
809
	int			proctype = DatumGetInt32(arg);
810
	PGPROC	   *auxproc;
J
Jan Wieck 已提交
811

812
	Assert(proctype >= 0 && proctype < NUM_AUXILIARY_PROCS);
J
Jan Wieck 已提交
813

814
	auxproc = &AuxiliaryProcs[proctype];
J
Jan Wieck 已提交
815

816
	Assert(MyProc == auxproc);
817

818
	/* Release any LW locks I am holding (see notes above) */
819 820
	LWLockReleaseAll();

821 822 823
	/* Release ownership of the process's latch, too */
	DisownLatch(&MyProc->procLatch);

824 825
	SpinLockAcquire(ProcStructLock);

826
	/* Mark auxiliary proc no longer in use */
827 828
	MyProc->pid = 0;

J
Jan Wieck 已提交
829
	/* PGPROC struct isn't mine anymore */
830
	MyProc = NULL;
831 832 833 834 835

	/* Update shared estimate of spins_per_delay */
	ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);

	SpinLockRelease(ProcStructLock);
836 837
}

838

839 840
/*
 * ProcQueue package: routines for putting processes to sleep
841
 *		and  waking them up
842 843 844 845 846
 */

/*
 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
 *
847 848
 * Returns: a pointer to the queue
 * Side Effects: Initializes the queue if it wasn't there before
849
 */
850
#ifdef NOT_USED
851
PROC_QUEUE *
852
ProcQueueAlloc(const char *name)
853
{
854
	PROC_QUEUE *queue;
855
	bool		found;
856

857 858 859
	queue = (PROC_QUEUE *)
		ShmemInitStruct(name, sizeof(PROC_QUEUE), &found);

860 861
	if (!found)
		ProcQueueInit(queue);
862

863
	return queue;
864
}
865
#endif
866 867 868 869 870

/*
 * ProcQueueInit -- initialize a shared memory process queue
 */
void
871
ProcQueueInit(PROC_QUEUE *queue)
872
{
873 874
	SHMQueueInit(&(queue->links));
	queue->size = 0;
875 876 877 878
}


/*
879
 * ProcSleep -- put a process to sleep on the specified lock
880
 *
881 882
 * Caller must have set MyProc->heldLocks to reflect locks already held
 * on the lockable object by this process (under all XIDs).
883
 *
884
 * The lock table's partition lock must be held at entry, and will be held
885
 * at exit.
886
 *
887
 * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
888
 *
889
 * ASSUME: that no one will fiddle with the queue until after
890
 *		we release the partition lock.
891 892
 *
 * NOTES: The process queue is now a priority queue for locking.
893 894 895
 *
 * P() on the semaphore should put us to sleep.  The process
 * semaphore is normally zero, so when we try to acquire it, we sleep.
896 897
 */
int
898
ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
899
{
900 901 902
	LOCKMODE	lockmode = locallock->tag.mode;
	LOCK	   *lock = locallock->lock;
	PROCLOCK   *proclock = locallock->proclock;
903 904
	uint32		hashcode = locallock->hashcode;
	LWLockId	partitionLock = LockHashPartitionLock(hashcode);
905
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
906
	LOCKMASK	myHeldLocks = MyProc->heldLocks;
907
	bool		early_deadlock = false;
B
Bruce Momjian 已提交
908
	bool		allow_autovacuum_cancel = true;
909
	int			myWaitStatus;
J
Jan Wieck 已提交
910
	PGPROC	   *proc;
911
	int			i;
912

913
	/*
914 915
	 * Determine where to add myself in the wait queue.
	 *
916 917 918 919
	 * Normally I should go at the end of the queue.  However, if I already
	 * hold locks that conflict with the request of any previous waiter, put
	 * myself in the queue just in front of the first such waiter. This is not
	 * a necessary step, since deadlock detection would move me to before that
B
Bruce Momjian 已提交
920 921
	 * waiter anyway; but it's relatively cheap to detect such a conflict
	 * immediately, and avoid delaying till deadlock timeout.
922
	 *
923 924
	 * Special case: if I find I should go in front of some waiter, check to
	 * see if I conflict with already-held locks or the requests before that
B
Bruce Momjian 已提交
925 926 927 928
	 * waiter.	If not, then just grant myself the requested lock immediately.
	 * This is the same as the test for immediate grant in LockAcquire, except
	 * we are only considering the part of the wait queue before my insertion
	 * point.
929 930
	 */
	if (myHeldLocks != 0)
V
Vadim B. Mikheev 已提交
931
	{
932
		LOCKMASK	aheadRequests = 0;
933

934
		proc = (PGPROC *) waitQueue->links.next;
935
		for (i = 0; i < waitQueue->size; i++)
V
Vadim B. Mikheev 已提交
936
		{
937
			/* Must he wait for me? */
B
Bruce Momjian 已提交
938
			if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks)
V
Vadim B. Mikheev 已提交
939
			{
940
				/* Must I wait for him ? */
B
Bruce Momjian 已提交
941
				if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks)
942
				{
943
					/*
B
Bruce Momjian 已提交
944 945 946 947 948
					 * Yes, so we have a deadlock.	Easiest way to clean up
					 * correctly is to call RemoveFromWaitQueue(), but we
					 * can't do that until we are *on* the wait queue. So, set
					 * a flag to check below, and break out of loop.  Also,
					 * record deadlock info for later message.
949
					 */
950
					RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
951 952
					early_deadlock = true;
					break;
953
				}
954
				/* I must go before this waiter.  Check special case. */
B
Bruce Momjian 已提交
955
				if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
956 957 958
					LockCheckConflicts(lockMethodTable,
									   lockmode,
									   lock,
959
									   proclock,
960
									   MyProc) == STATUS_OK)
961
				{
962
					/* Skip the wait and just grant myself the lock. */
963
					GrantLock(lock, proclock, lockmode);
964
					GrantAwaitedLock();
965
					return STATUS_OK;
966 967
				}
				/* Break out of loop to put myself before him */
V
Vadim B. Mikheev 已提交
968
				break;
969
			}
970
			/* Nope, so advance to next waiter */
971
			aheadRequests |= LOCKBIT_ON(proc->waitLockMode);
972
			proc = (PGPROC *) proc->links.next;
V
Vadim B. Mikheev 已提交
973
		}
B
Bruce Momjian 已提交
974

975
		/*
B
Bruce Momjian 已提交
976 977
		 * If we fall out of loop normally, proc points to waitQueue head, so
		 * we will insert at tail of queue as desired.
978
		 */
979 980 981 982
	}
	else
	{
		/* I hold no locks, so I can't push in front of anyone. */
J
Jan Wieck 已提交
983
		proc = (PGPROC *) &(waitQueue->links);
V
Vadim B. Mikheev 已提交
984
	}
985

986
	/*
B
Bruce Momjian 已提交
987
	 * Insert self into queue, ahead of the given proc (or at tail of queue).
988
	 */
989
	SHMQueueInsertBefore(&(proc->links), &(MyProc->links));
B
Bruce Momjian 已提交
990
	waitQueue->size++;
991

992
	lock->waitMask |= LOCKBIT_ON(lockmode);
993

J
Jan Wieck 已提交
994
	/* Set up wait information in PGPROC object, too */
995
	MyProc->waitLock = lock;
996
	MyProc->waitProcLock = proclock;
997 998
	MyProc->waitLockMode = lockmode;

999
	MyProc->waitStatus = STATUS_WAITING;
1000 1001

	/*
B
Bruce Momjian 已提交
1002 1003 1004
	 * If we detected deadlock, give up without waiting.  This must agree with
	 * CheckDeadLock's recovery code, except that we shouldn't release the
	 * semaphore since we haven't tried to lock it yet.
1005 1006 1007
	 */
	if (early_deadlock)
	{
1008
		RemoveFromWaitQueue(MyProc, hashcode);
1009 1010
		return STATUS_ERROR;
	}
1011

1012
	/* mark that we are waiting for a lock */
1013
	lockAwaited = locallock;
1014

1015
	/*
1016
	 * Release the lock table's partition lock.
1017
	 *
1018
	 * NOTE: this may also cause us to exit critical-section state, possibly
B
Bruce Momjian 已提交
1019 1020
	 * allowing a cancel/die interrupt to be accepted. This is OK because we
	 * have recorded the fact that we are waiting for a lock, and so
1021
	 * LockWaitCancel will clean up if cancel/die happens.
1022
	 */
1023
	LWLockRelease(partitionLock);
1024

1025 1026 1027 1028 1029 1030 1031 1032 1033
	/*
	 * Also, now that we will successfully clean up after an ereport, it's
	 * safe to check to see if there's a buffer pin deadlock against the
	 * Startup process.  Of course, that's only necessary if we're doing
	 * Hot Standby and are not the Startup process ourselves.
	 */
	if (RecoveryInProgress() && !InRecovery)
		CheckRecoveryConflictDeadlock();

1034 1035 1036
	/* Reset deadlock_state before enabling the signal handler */
	deadlock_state = DS_NOT_YET_CHECKED;

1037
	/*
B
Bruce Momjian 已提交
1038 1039 1040 1041
	 * Set timer so we can wake up after awhile and check for a deadlock. If a
	 * deadlock is detected, the handler releases the process's semaphore and
	 * sets MyProc->waitStatus = STATUS_ERROR, allowing us to know that we
	 * must report failure rather than success.
1042
	 *
1043 1044
	 * By delaying the check until we've waited for a bit, we can avoid
	 * running the rather expensive deadlock-check code in most cases.
1045
	 */
1046
	if (!enable_sig_alarm(DeadlockTimeout, false))
1047
		elog(FATAL, "could not set timer for process wakeup");
1048

1049
	/*
1050
	 * If someone wakes us between LWLockRelease and PGSemaphoreLock,
B
Bruce Momjian 已提交
1051
	 * PGSemaphoreLock will not block.	The wakeup is "saved" by the semaphore
B
Bruce Momjian 已提交
1052 1053 1054 1055 1056
	 * implementation.	While this is normally good, there are cases where a
	 * saved wakeup might be leftover from a previous operation (for example,
	 * we aborted ProcWaitForSignal just before someone did ProcSendSignal).
	 * So, loop to wait again if the waitStatus shows we haven't been granted
	 * nor denied the lock yet.
1057
	 *
1058 1059 1060 1061 1062 1063 1064
	 * We pass interruptOK = true, which eliminates a window in which
	 * cancel/die interrupts would be held off undesirably.  This is a promise
	 * that we don't mind losing control to a cancel/die interrupt here.  We
	 * don't, because we have no shared-state-change work to do after being
	 * granted the lock (the grantor did it all).  We do have to worry about
	 * updating the locallock table, but if we lose control to an error,
	 * LockWaitCancel will fix that up.
1065
	 */
B
Bruce Momjian 已提交
1066 1067
	do
	{
1068
		PGSemaphoreLock(&MyProc->sem, true);
1069

1070 1071
		/*
		 * waitStatus could change from STATUS_WAITING to something else
B
Bruce Momjian 已提交
1072
		 * asynchronously.	Read it just once per loop to prevent surprising
1073 1074 1075 1076
		 * behavior (such as missing log messages).
		 */
		myWaitStatus = MyProc->waitStatus;

1077 1078
		/*
		 * If we are not deadlocked, but are waiting on an autovacuum-induced
B
Bruce Momjian 已提交
1079
		 * task, send a signal to interrupt it.
1080 1081 1082
		 */
		if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel)
		{
B
Bruce Momjian 已提交
1083
			PGPROC	   *autovac = GetBlockingAutoVacuumPgproc();
1084
			PGXACT	   *autovac_pgxact = &ProcGlobal->allPgXact[autovac->pgprocno];
1085 1086 1087 1088 1089 1090 1091 1092

			LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

			/*
			 * Only do it if the worker is not working to protect against Xid
			 * wraparound.
			 */
			if ((autovac != NULL) &&
1093 1094
				(autovac_pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) &&
				!(autovac_pgxact->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND))
1095
			{
B
Bruce Momjian 已提交
1096
				int			pid = autovac->pid;
1097

P
Peter Eisentraut 已提交
1098
				elog(DEBUG2, "sending cancel to blocking autovacuum PID %d",
1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
					 pid);

				/* don't hold the lock across the kill() syscall */
				LWLockRelease(ProcArrayLock);

				/* send the autovacuum worker Back to Old Kent Road */
				if (kill(pid, SIGINT) < 0)
				{
					/* Just a warning to allow multiple callers */
					ereport(WARNING,
							(errmsg("could not send signal to process %d: %m",
									pid)));
				}
			}
			else
				LWLockRelease(ProcArrayLock);

			/* prevent signal from being resent more than once */
			allow_autovacuum_cancel = false;
		}

1120 1121 1122 1123
		/*
		 * If awoken after the deadlock check interrupt has run, and
		 * log_lock_waits is on, then report about the wait.
		 */
1124
		if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED)
1125
		{
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
			StringInfoData buf;
			const char *modename;
			long		secs;
			int			usecs;
			long		msecs;

			initStringInfo(&buf);
			DescribeLockTag(&buf, &locallock->tag.lock);
			modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid,
									   lockmode);
			TimestampDifference(timeout_start_time, GetCurrentTimestamp(),
								&secs, &usecs);
			msecs = secs * 1000 + usecs / 1000;
			usecs = usecs % 1000;

			if (deadlock_state == DS_SOFT_DEADLOCK)
				ereport(LOG,
						(errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms",
B
Bruce Momjian 已提交
1144
							  MyProcPid, modename, buf.data, msecs, usecs)));
1145
			else if (deadlock_state == DS_HARD_DEADLOCK)
1146
			{
1147
				/*
B
Bruce Momjian 已提交
1148 1149 1150 1151
				 * This message is a bit redundant with the error that will be
				 * reported subsequently, but in some cases the error report
				 * might not make it to the log (eg, if it's caught by an
				 * exception handler), and we want to ensure all long-wait
1152 1153 1154 1155
				 * events get logged.
				 */
				ereport(LOG,
						(errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms",
B
Bruce Momjian 已提交
1156
							  MyProcPid, modename, buf.data, msecs, usecs)));
1157
			}
1158 1159 1160 1161

			if (myWaitStatus == STATUS_WAITING)
				ereport(LOG,
						(errmsg("process %d still waiting for %s on %s after %ld.%03d ms",
B
Bruce Momjian 已提交
1162
							  MyProcPid, modename, buf.data, msecs, usecs)));
1163 1164
			else if (myWaitStatus == STATUS_OK)
				ereport(LOG,
B
Bruce Momjian 已提交
1165 1166
					(errmsg("process %d acquired %s on %s after %ld.%03d ms",
							MyProcPid, modename, buf.data, msecs, usecs)));
1167 1168 1169
			else
			{
				Assert(myWaitStatus == STATUS_ERROR);
B
Bruce Momjian 已提交
1170

1171 1172
				/*
				 * Currently, the deadlock checker always kicks its own
B
Bruce Momjian 已提交
1173 1174 1175 1176 1177
				 * process, which means that we'll only see STATUS_ERROR when
				 * deadlock_state == DS_HARD_DEADLOCK, and there's no need to
				 * print redundant messages.  But for completeness and
				 * future-proofing, print a message if it looks like someone
				 * else kicked us off the lock.
1178 1179 1180 1181
				 */
				if (deadlock_state != DS_HARD_DEADLOCK)
					ereport(LOG,
							(errmsg("process %d failed to acquire %s on %s after %ld.%03d ms",
B
Bruce Momjian 已提交
1182
							  MyProcPid, modename, buf.data, msecs, usecs)));
1183 1184 1185
			}

			/*
B
Bruce Momjian 已提交
1186 1187
			 * At this point we might still need to wait for the lock. Reset
			 * state so we don't print the above messages again.
1188 1189 1190 1191
			 */
			deadlock_state = DS_NO_DEADLOCK;

			pfree(buf.data);
1192
		}
1193
	} while (myWaitStatus == STATUS_WAITING);
1194

1195
	/*
1196
	 * Disable the timer, if it's still running
B
Bruce Momjian 已提交
1197
	 */
1198
	if (!disable_sig_alarm(false))
1199
		elog(FATAL, "could not disable timer for process wakeup");
B
Bruce Momjian 已提交
1200

1201
	/*
B
Bruce Momjian 已提交
1202 1203 1204
	 * Re-acquire the lock table's partition lock.  We have to do this to hold
	 * off cancel/die interrupts before we can mess with lockAwaited (else we
	 * might have a missed or duplicated locallock update).
1205
	 */
1206
	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
1207 1208 1209

	/*
	 * We no longer want LockWaitCancel to do anything.
1210
	 */
1211
	lockAwaited = NULL;
1212

1213
	/*
1214
	 * If we got the lock, be sure to remember it in the locallock table.
1215
	 */
1216
	if (MyProc->waitStatus == STATUS_OK)
1217
		GrantAwaitedLock();
1218

1219 1220 1221 1222
	/*
	 * We don't have to do anything else, because the awaker did all the
	 * necessary update of the lock table and MyProc.
	 */
1223
	return MyProc->waitStatus;
1224 1225 1226 1227 1228 1229
}


/*
 * ProcWakeup -- wake up a process by releasing its private semaphore.
 *
1230
 *	 Also remove the process from the wait queue and set its links invalid.
1231
 *	 RETURN: the next process in the wait queue.
1232
 *
1233 1234
 * The appropriate lock partition lock must be held by caller.
 *
1235 1236 1237
 * XXX: presently, this code is only used for the "success" case, and only
 * works correctly for that case.  To clean up in failure case, would need
 * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
1238
 * Hence, in practice the waitStatus parameter must be STATUS_OK.
1239
 */
J
Jan Wieck 已提交
1240
PGPROC *
1241
ProcWakeup(PGPROC *proc, int waitStatus)
1242
{
J
Jan Wieck 已提交
1243
	PGPROC	   *retProc;
1244

1245
	/* Proc should be sleeping ... */
1246 1247
	if (proc->links.prev == NULL ||
		proc->links.next == NULL)
1248
		return NULL;
1249
	Assert(proc->waitStatus == STATUS_WAITING);
1250

1251
	/* Save next process before we zap the list link */
1252
	retProc = (PGPROC *) proc->links.next;
1253

1254
	/* Remove process from wait queue */
1255
	SHMQueueDelete(&(proc->links));
1256
	(proc->waitLock->waitProcs.size)--;
1257

1258 1259
	/* Clean up process' state and pass it the ok/fail signal */
	proc->waitLock = NULL;
1260
	proc->waitProcLock = NULL;
1261
	proc->waitStatus = waitStatus;
1262

1263
	/* And awaken it */
1264
	PGSemaphoreUnlock(&proc->sem);
1265 1266

	return retProc;
1267 1268 1269 1270
}

/*
 * ProcLockWakeup -- routine for waking up processes when a lock is
1271 1272
 *		released (or a prior waiter is aborted).  Scan all waiters
 *		for lock, waken any that are no longer blocked.
1273 1274
 *
 * The appropriate lock partition lock must be held by caller.
1275
 */
1276
void
1277
ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
1278
{
1279 1280
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			queue_size = waitQueue->size;
J
Jan Wieck 已提交
1281
	PGPROC	   *proc;
1282
	LOCKMASK	aheadRequests = 0;
M
 
Marc G. Fournier 已提交
1283

1284
	Assert(queue_size >= 0);
1285

1286 1287
	if (queue_size == 0)
		return;
1288

1289
	proc = (PGPROC *) waitQueue->links.next;
1290

1291 1292
	while (queue_size-- > 0)
	{
B
Bruce Momjian 已提交
1293
		LOCKMODE	lockmode = proc->waitLockMode;
M
 
Marc G. Fournier 已提交
1294 1295

		/*
B
Bruce Momjian 已提交
1296 1297
		 * Waken if (a) doesn't conflict with requests of earlier waiters, and
		 * (b) doesn't conflict with already-held locks.
M
 
Marc G. Fournier 已提交
1298
		 */
B
Bruce Momjian 已提交
1299
		if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
1300 1301 1302
			LockCheckConflicts(lockMethodTable,
							   lockmode,
							   lock,
1303
							   proc->waitProcLock,
1304
							   proc) == STATUS_OK)
M
 
Marc G. Fournier 已提交
1305
		{
1306
			/* OK to waken */
1307
			GrantLock(lock, proc->waitProcLock, lockmode);
1308
			proc = ProcWakeup(proc, STATUS_OK);
B
Bruce Momjian 已提交
1309

1310
			/*
B
Bruce Momjian 已提交
1311 1312 1313
			 * ProcWakeup removes proc from the lock's waiting process queue
			 * and returns the next proc in chain; don't use proc's next-link,
			 * because it's been cleared.
1314
			 */
M
 
Marc G. Fournier 已提交
1315
		}
1316
		else
1317
		{
B
Bruce Momjian 已提交
1318
			/*
B
Bruce Momjian 已提交
1319
			 * Cannot wake this guy. Remember his request for later checks.
B
Bruce Momjian 已提交
1320
			 */
1321
			aheadRequests |= LOCKBIT_ON(lockmode);
1322
			proc = (PGPROC *) proc->links.next;
1323
		}
M
 
Marc G. Fournier 已提交
1324
	}
1325 1326

	Assert(waitQueue->size >= 0);
1327 1328
}

1329 1330 1331
/*
 * CheckDeadLock
 *
1332
 * We only get to this routine if we got SIGALRM after DeadlockTimeout
1333 1334
 * while waiting for a lock to be released by some other process.  Look
 * to see if there's a deadlock; if not, just return and continue waiting.
1335
 * (But signal ProcSleep to log a message, if log_lock_waits is true.)
1336 1337
 * If we have a real deadlock, remove ourselves from the lock's wait queue
 * and signal an error to ProcSleep.
1338 1339 1340
 *
 * NB: this is run inside a signal handler, so be very wary about what is done
 * here or in called routines.
1341
 */
1342
static void
1343
CheckDeadLock(void)
1344
{
1345 1346
	int			i;

1347
	/*
B
Bruce Momjian 已提交
1348 1349
	 * Acquire exclusive lock on the entire shared lock data structures. Must
	 * grab LWLocks in partition-number order to avoid LWLock deadlock.
1350 1351 1352 1353 1354 1355
	 *
	 * Note that the deadlock check interrupt had better not be enabled
	 * anywhere that this process itself holds lock partition locks, else this
	 * will wait forever.  Also note that LWLockAcquire creates a critical
	 * section, so that this routine cannot be interrupted by cancel/die
	 * interrupts.
1356
	 */
1357 1358
	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
		LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE);
1359

1360
	/*
1361 1362
	 * Check to see if we've been awoken by anyone in the interim.
	 *
1363
	 * If we have, we can return and resume our transaction -- happy day.
1364 1365
	 * Before we are awoken the process releasing the lock grants it to us so
	 * we know that we don't have to wait anymore.
1366
	 *
1367
	 * We check by looking to see if we've been unlinked from the wait queue.
B
Bruce Momjian 已提交
1368
	 * This is quicker than checking our semaphore's state, since no kernel
1369
	 * call is needed, and it is safe because we hold the lock partition lock.
1370
	 */
1371 1372
	if (MyProc->links.prev == NULL ||
		MyProc->links.next == NULL)
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382
		goto check_done;

#ifdef LOCK_DEBUG
	if (Debug_deadlocks)
		DumpAllLocks();
#endif

	/* Run the deadlock check, and set deadlock_state for use by ProcSleep */
	deadlock_state = DeadLockCheck(MyProc);

1383
	if (deadlock_state == DS_HARD_DEADLOCK)
B
Bruce Momjian 已提交
1384
	{
1385 1386 1387
		/*
		 * Oops.  We have a deadlock.
		 *
1388 1389 1390 1391
		 * Get this process out of wait state. (Note: we could do this more
		 * efficiently by relying on lockAwaited, but use this coding to
		 * preserve the flexibility to kill some other transaction than the
		 * one detecting the deadlock.)
1392 1393
		 *
		 * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
1394 1395
		 * ProcSleep will report an error after we return from the signal
		 * handler.
1396 1397 1398
		 */
		Assert(MyProc->waitLock != NULL);
		RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
1399

1400 1401 1402 1403 1404
		/*
		 * Unlock my semaphore so that the interrupted ProcSleep() call can
		 * finish.
		 */
		PGSemaphoreUnlock(&MyProc->sem);
1405

1406
		/*
1407 1408 1409 1410 1411 1412 1413 1414
		 * We're done here.  Transaction abort caused by the error that
		 * ProcSleep will raise will cause any other locks we hold to be
		 * released, thus allowing other processes to wake up; we don't need
		 * to do that here.  NOTE: an exception is that releasing locks we
		 * hold doesn't consider the possibility of waiters that were blocked
		 * behind us on the lock we just failed to get, and might now be
		 * wakable because we're not in front of them anymore.  However,
		 * RemoveFromWaitQueue took care of waking up any such processes.
1415 1416
		 */
	}
1417
	else if (log_lock_waits || deadlock_state == DS_BLOCKED_BY_AUTOVACUUM)
1418 1419 1420 1421
	{
		/*
		 * Unlock my semaphore so that the interrupted ProcSleep() call can
		 * print the log message (we daren't do it here because we are inside
B
Bruce Momjian 已提交
1422 1423
		 * a signal handler).  It will then sleep again until someone releases
		 * the lock.
1424 1425
		 *
		 * If blocked by autovacuum, this wakeup will enable ProcSleep to send
1426
		 * the canceling signal to the autovacuum worker.
1427 1428 1429
		 */
		PGSemaphoreUnlock(&MyProc->sem);
	}
1430 1431

	/*
B
Bruce Momjian 已提交
1432 1433 1434 1435 1436
	 * And release locks.  We do this in reverse order for two reasons: (1)
	 * Anyone else who needs more than one of the locks will be trying to lock
	 * them in increasing order; we don't want to release the other process
	 * until it can get all the locks it needs. (2) This avoids O(N^2)
	 * behavior inside LWLockRelease.
1437
	 */
1438
check_done:
B
Bruce Momjian 已提交
1439
	for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
1440
		LWLockRelease(FirstLockMgrLock + i);
1441 1442 1443
}


1444 1445 1446 1447 1448 1449
/*
 * ProcWaitForSignal - wait for a signal from another backend.
 *
 * This can share the semaphore normally used for waiting for locks,
 * since a backend could never be waiting for a lock and a signal at
 * the same time.  As with locks, it's OK if the signal arrives just
B
Bruce Momjian 已提交
1450
 * before we actually reach the waiting state.	Also as with locks,
1451 1452
 * it's necessary that the caller be robust against bogus wakeups:
 * always check that the desired state has occurred, and wait again
B
Bruce Momjian 已提交
1453
 * if not.	This copes with possible "leftover" wakeups.
1454 1455 1456 1457
 */
void
ProcWaitForSignal(void)
{
1458
	PGSemaphoreLock(&MyProc->sem, true);
1459 1460 1461
}

/*
1462
 * ProcSendSignal - send a signal to a backend identified by PID
1463 1464
 */
void
1465
ProcSendSignal(int pid)
1466
{
1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477
	PGPROC	   *proc = NULL;

	if (RecoveryInProgress())
	{
		/* use volatile pointer to prevent code rearrangement */
		volatile PROC_HDR *procglobal = ProcGlobal;

		SpinLockAcquire(ProcStructLock);

		/*
		 * Check to see whether it is the Startup process we wish to signal.
B
Bruce Momjian 已提交
1478 1479
		 * This call is made by the buffer manager when it wishes to wake up a
		 * process that has been waiting for a pin in so it can obtain a
1480
		 * cleanup lock using LockBufferForCleanup(). Startup is not a normal
B
Bruce Momjian 已提交
1481 1482
		 * backend, so BackendPidGetProc() will not return any pid at all. So
		 * we remember the information for this special case.
1483 1484 1485 1486 1487 1488 1489 1490 1491
		 */
		if (pid == procglobal->startupProcPid)
			proc = procglobal->startupProc;

		SpinLockRelease(ProcStructLock);
	}

	if (proc == NULL)
		proc = BackendPidGetProc(pid);
1492 1493

	if (proc != NULL)
1494
		PGSemaphoreUnlock(&proc->sem);
1495 1496 1497
}


1498 1499 1500 1501 1502 1503 1504 1505 1506
/*****************************************************************************
 * SIGALRM interrupt support
 *
 * Maybe these should be in pqsignal.c?
 *****************************************************************************/

/*
 * Enable the SIGALRM interrupt to fire after the specified delay
 *
1507
 * Delay is given in milliseconds.	Caller should be sure a SIGALRM
1508 1509
 * signal handler is installed before this is called.
 *
1510 1511
 * This code properly handles nesting of deadlock timeout alarms within
 * statement timeout alarms.
1512
 *
1513 1514 1515
 * Returns TRUE if okay, FALSE on failure.
 */
bool
1516
enable_sig_alarm(int delayms, bool is_statement_timeout)
1517
{
1518
	TimestampTz fin_time;
1519
	struct itimerval timeval;
1520

1521 1522
	if (is_statement_timeout)
	{
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532
		/*
		 * Begin statement-level timeout
		 *
		 * Note that we compute statement_fin_time with reference to the
		 * statement_timestamp, but apply the specified delay without any
		 * correction; that is, we ignore whatever time has elapsed since
		 * statement_timestamp was set.  In the normal case only a small
		 * interval will have elapsed and so this doesn't matter, but there
		 * are corner cases (involving multi-statement query strings with
		 * embedded COMMIT or ROLLBACK) where we might re-initialize the
B
Bruce Momjian 已提交
1533 1534 1535 1536
		 * statement timeout long after initial receipt of the message. In
		 * such cases the enforcement of the statement timeout will be a bit
		 * inconsistent.  This annoyance is judged not worth the cost of
		 * performing an additional gettimeofday() here.
1537
		 */
1538
		Assert(!deadlock_timeout_active);
1539 1540
		fin_time = GetCurrentStatementStartTimestamp();
		fin_time = TimestampTzPlusMilliseconds(fin_time, delayms);
1541
		statement_fin_time = fin_time;
1542
		cancel_from_timeout = false;
1543
		statement_timeout_active = true;
1544 1545 1546 1547 1548 1549
	}
	else if (statement_timeout_active)
	{
		/*
		 * Begin deadlock timeout with statement-level timeout active
		 *
1550 1551 1552 1553
		 * Here, we want to interrupt at the closer of the two timeout times.
		 * If fin_time >= statement_fin_time then we need not touch the
		 * existing timer setting; else set up to interrupt at the deadlock
		 * timeout time.
1554 1555 1556
		 *
		 * NOTE: in this case it is possible that this routine will be
		 * interrupted by the previously-set timer alarm.  This is okay
B
Bruce Momjian 已提交
1557 1558 1559
		 * because the signal handler will do only what it should do according
		 * to the state variables.	The deadlock checker may get run earlier
		 * than normal, but that does no harm.
1560
		 */
1561 1562
		timeout_start_time = GetCurrentTimestamp();
		fin_time = TimestampTzPlusMilliseconds(timeout_start_time, delayms);
1563
		deadlock_timeout_active = true;
1564
		if (fin_time >= statement_fin_time)
1565 1566 1567 1568 1569 1570
			return true;
	}
	else
	{
		/* Begin deadlock timeout with no statement-level timeout */
		deadlock_timeout_active = true;
1571 1572 1573
		/* GetCurrentTimestamp can be expensive, so only do it if we must */
		if (log_lock_waits)
			timeout_start_time = GetCurrentTimestamp();
1574
	}
1575

1576
	/* If we reach here, okay to set the timer interrupt */
1577
	MemSet(&timeval, 0, sizeof(struct itimerval));
1578 1579
	timeval.it_value.tv_sec = delayms / 1000;
	timeval.it_value.tv_usec = (delayms % 1000) * 1000;
1580
	if (setitimer(ITIMER_REAL, &timeval, NULL))
1581
		return false;
1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602
	return true;
}

/*
 * Cancel the SIGALRM timer, either for a deadlock timeout or a statement
 * timeout.  If a deadlock timeout is canceled, any active statement timeout
 * remains in force.
 *
 * Returns TRUE if okay, FALSE on failure.
 */
bool
disable_sig_alarm(bool is_statement_timeout)
{
	/*
	 * Always disable the interrupt if it is active; this avoids being
	 * interrupted by the signal handler and thereby possibly getting
	 * confused.
	 *
	 * We will re-enable the interrupt if necessary in CheckStatementTimeout.
	 */
	if (statement_timeout_active || deadlock_timeout_active)
1603
	{
1604
		struct itimerval timeval;
1605

1606
		MemSet(&timeval, 0, sizeof(struct itimerval));
1607
		if (setitimer(ITIMER_REAL, &timeval, NULL))
1608
		{
1609 1610 1611
			statement_timeout_active = false;
			cancel_from_timeout = false;
			deadlock_timeout_active = false;
1612 1613
			return false;
		}
1614 1615
	}

1616 1617 1618 1619
	/* Always cancel deadlock timeout, in case this is error cleanup */
	deadlock_timeout_active = false;

	/* Cancel or reschedule statement timeout */
1620
	if (is_statement_timeout)
1621
	{
1622
		statement_timeout_active = false;
1623 1624
		cancel_from_timeout = false;
	}
1625 1626 1627 1628 1629
	else if (statement_timeout_active)
	{
		if (!CheckStatementTimeout())
			return false;
	}
1630 1631 1632
	return true;
}

1633

1634
/*
1635 1636 1637
 * Check for statement timeout.  If the timeout time has come,
 * trigger a query-cancel interrupt; if not, reschedule the SIGALRM
 * interrupt to occur at the right time.
1638
 *
1639
 * Returns true if okay, false if failed to set the interrupt.
1640
 */
1641 1642
static bool
CheckStatementTimeout(void)
1643
{
1644
	TimestampTz now;
B
Bruce Momjian 已提交
1645

1646 1647 1648
	if (!statement_timeout_active)
		return true;			/* do nothing if not active */

1649
	now = GetCurrentTimestamp();
1650

1651
	if (now >= statement_fin_time)
1652
	{
1653 1654
		/* Time to die */
		statement_timeout_active = false;
1655
		cancel_from_timeout = true;
1656 1657 1658 1659
#ifdef HAVE_SETSID
		/* try to signal whole process group */
		kill(-MyProcPid, SIGINT);
#endif
1660
		kill(MyProcPid, SIGINT);
1661 1662 1663 1664
	}
	else
	{
		/* Not time yet, so (re)schedule the interrupt */
1665 1666
		long		secs;
		int			usecs;
1667 1668
		struct itimerval timeval;

1669 1670
		TimestampDifference(now, statement_fin_time,
							&secs, &usecs);
B
Bruce Momjian 已提交
1671

1672 1673 1674 1675 1676 1677
		/*
		 * It's possible that the difference is less than a microsecond;
		 * ensure we don't cancel, rather than set, the interrupt.
		 */
		if (secs == 0 && usecs == 0)
			usecs = 1;
1678
		MemSet(&timeval, 0, sizeof(struct itimerval));
1679 1680
		timeval.it_value.tv_sec = secs;
		timeval.it_value.tv_usec = usecs;
1681
		if (setitimer(ITIMER_REAL, &timeval, NULL))
1682 1683 1684
			return false;
	}

1685 1686
	return true;
}
1687 1688 1689


/*
1690
 * Signal handler for SIGALRM for normal user backends
1691 1692 1693 1694 1695
 *
 * Process deadlock check and/or statement timeout check, as needed.
 * To avoid various edge cases, we must be careful to do nothing
 * when there is nothing to be done.  We also need to be able to
 * reschedule the timer interrupt if called before end of statement.
1696 1697 1698 1699
 */
void
handle_sig_alarm(SIGNAL_ARGS)
{
1700 1701
	int			save_errno = errno;

1702 1703 1704 1705
	/* SIGALRM is cause for waking anything waiting on the process latch */
	if (MyProc)
		SetLatch(&MyProc->procLatch);

1706
	if (deadlock_timeout_active)
1707
	{
1708
		deadlock_timeout_active = false;
1709 1710
		CheckDeadLock();
	}
1711 1712 1713 1714 1715

	if (statement_timeout_active)
		(void) CheckStatementTimeout();

	errno = save_errno;
1716
}
1717 1718 1719 1720 1721 1722 1723

/*
 * Signal handler for SIGALRM in Startup process
 *
 * To avoid various edge cases, we must be careful to do nothing
 * when there is nothing to be done.  We also need to be able to
 * reschedule the timer interrupt if called before end of statement.
1724 1725 1726
 *
 * We set either deadlock_timeout_active or statement_timeout_active
 * or both. Interrupts are enabled if standby_timeout_active.
1727 1728
 */
bool
1729
enable_standby_sig_alarm(TimestampTz now, TimestampTz fin_time, bool deadlock_only)
1730
{
1731 1732
	TimestampTz deadlock_time = TimestampTzPlusMilliseconds(now,
															DeadlockTimeout);
1733

1734 1735 1736
	if (deadlock_only)
	{
		/*
1737
		 * Wake up at deadlock_time only, then wait forever
1738 1739 1740 1741 1742 1743 1744 1745
		 */
		statement_fin_time = deadlock_time;
		deadlock_timeout_active = true;
		statement_timeout_active = false;
	}
	else if (fin_time > deadlock_time)
	{
		/*
1746
		 * Wake up at deadlock_time, then again at fin_time
1747 1748 1749 1750 1751 1752 1753 1754 1755
		 */
		statement_fin_time = deadlock_time;
		statement_fin_time2 = fin_time;
		deadlock_timeout_active = true;
		statement_timeout_active = true;
	}
	else
	{
		/*
1756
		 * Wake only at fin_time because its fairly soon
1757 1758 1759 1760 1761
		 */
		statement_fin_time = fin_time;
		deadlock_timeout_active = false;
		statement_timeout_active = true;
	}
1762

1763 1764 1765 1766 1767
	if (deadlock_timeout_active || statement_timeout_active)
	{
		long		secs;
		int			usecs;
		struct itimerval timeval;
B
Bruce Momjian 已提交
1768

1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779
		TimestampDifference(now, statement_fin_time,
							&secs, &usecs);
		if (secs == 0 && usecs == 0)
			usecs = 1;
		MemSet(&timeval, 0, sizeof(struct itimerval));
		timeval.it_value.tv_sec = secs;
		timeval.it_value.tv_usec = usecs;
		if (setitimer(ITIMER_REAL, &timeval, NULL))
			return false;
		standby_timeout_active = true;
	}
1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819

	return true;
}

bool
disable_standby_sig_alarm(void)
{
	/*
	 * Always disable the interrupt if it is active; this avoids being
	 * interrupted by the signal handler and thereby possibly getting
	 * confused.
	 *
	 * We will re-enable the interrupt if necessary in CheckStandbyTimeout.
	 */
	if (standby_timeout_active)
	{
		struct itimerval timeval;

		MemSet(&timeval, 0, sizeof(struct itimerval));
		if (setitimer(ITIMER_REAL, &timeval, NULL))
		{
			standby_timeout_active = false;
			return false;
		}
	}

	standby_timeout_active = false;

	return true;
}

/*
 * CheckStandbyTimeout() runs unconditionally in the Startup process
 * SIGALRM handler. Timers will only be set when InHotStandby.
 * We simply ignore any signals unless the timer has been set.
 */
static bool
CheckStandbyTimeout(void)
{
	TimestampTz now;
B
Bruce Momjian 已提交
1820
	bool		reschedule = false;
1821 1822 1823 1824 1825

	standby_timeout_active = false;

	now = GetCurrentTimestamp();

1826
	/*
B
Bruce Momjian 已提交
1827 1828
	 * Reschedule the timer if its not time to wake yet, or if we have both
	 * timers set and the first one has just been reached.
1829
	 */
1830
	if (now >= statement_fin_time)
1831 1832 1833 1834
	{
		if (deadlock_timeout_active)
		{
			/*
1835
			 * We're still waiting when we reach deadlock timeout, so send out
B
Bruce Momjian 已提交
1836 1837
			 * a request to have other backends check themselves for deadlock.
			 * Then continue waiting until statement_fin_time, if that's set.
1838 1839 1840 1841 1842
			 */
			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
			deadlock_timeout_active = false;

			/*
1843
			 * Begin second waiting period if required.
1844 1845 1846 1847 1848 1849 1850 1851 1852 1853
			 */
			if (statement_timeout_active)
			{
				reschedule = true;
				statement_fin_time = statement_fin_time2;
			}
		}
		else
		{
			/*
1854 1855
			 * We've now reached statement_fin_time, so ask all conflicts to
			 * leave, so we can press ahead with applying changes in recovery.
1856 1857 1858 1859
			 */
			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
		}
	}
1860
	else
1861 1862 1863
		reschedule = true;

	if (reschedule)
1864 1865 1866 1867
	{
		long		secs;
		int			usecs;
		struct itimerval timeval;
B
Bruce Momjian 已提交
1868

1869 1870 1871 1872 1873 1874 1875 1876 1877
		TimestampDifference(now, statement_fin_time,
							&secs, &usecs);
		if (secs == 0 && usecs == 0)
			usecs = 1;
		MemSet(&timeval, 0, sizeof(struct itimerval));
		timeval.it_value.tv_sec = secs;
		timeval.it_value.tv_usec = usecs;
		if (setitimer(ITIMER_REAL, &timeval, NULL))
			return false;
1878
		standby_timeout_active = true;
1879 1880 1881 1882 1883 1884 1885 1886
	}

	return true;
}

void
handle_standby_sig_alarm(SIGNAL_ARGS)
{
B
Bruce Momjian 已提交
1887
	int			save_errno = errno;
1888 1889 1890 1891 1892 1893

	if (standby_timeout_active)
		(void) CheckStandbyTimeout();

	errno = save_errno;
}