proc.c 30.3 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * proc.c
4
 *	  routines to manage per-process shared memory data structure
5
 *
B
Bruce Momjian 已提交
6
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
B
Bruce Momjian 已提交
11
 *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.124 2002/07/19 00:17:40 momjian Exp $
12 13 14 15 16
 *
 *-------------------------------------------------------------------------
 */
/*
 * Interface (a):
17
 *		ProcSleep(), ProcWakeup(),
18 19
 *		ProcQueueAlloc() -- create a shm queue for sleeping processes
 *		ProcQueueInit() -- create a queue without allocing memory
20 21 22 23 24 25 26 27
 *
 * Locking and waiting for buffers can cause the backend to be
 * put to sleep.  Whoever releases the lock, etc. wakes the
 * process up again (and gives it an error code so it knows
 * whether it was awoken on an error condition).
 *
 * Interface (b):
 *
28 29
 * ProcReleaseLocks -- frees the locks associated with current transaction
 *
30
 * ProcKill -- destroys the shared memory state (and locks)
31
 *		associated with the process.
32 33
 *
 * 5/15/91 -- removed the buffer pool based lock chain in favor
34 35 36 37 38 39
 *		of a shared memory lock chain.	The write-protection is
 *		more expensive if the lock chain is in the buffer pool.
 *		The only reason I kept the lock chain in the buffer pool
 *		in the first place was to allow the lock table to grow larger
 *		than available shared memory and that isn't going to work
 *		without a lot of unimplemented support anyway.
40
 */
41 42
#include "postgres.h"

43
#include <errno.h>
44
#include <signal.h>
45 46
#include <unistd.h>
#include <sys/time.h>
M
Marc G. Fournier 已提交
47

48
#include "miscadmin.h"
49
#include "access/xact.h"
50
#include "storage/ipc.h"
51
#include "storage/proc.h"
52
#include "storage/sinval.h"
53
#include "storage/spin.h"
54

B
Bruce Momjian 已提交
55
int			DeadlockTimeout = 1000;
56 57 58
int			StatementTimeout = 0;
int			RemainingStatementTimeout = 0;
bool		alarm_is_statement_timeout = false;
M
 
Marc G. Fournier 已提交
59

J
Jan Wieck 已提交
60
PGPROC	   *MyProc = NULL;
61 62

/*
J
Jan Wieck 已提交
63
 * This spinlock protects the freelist of recycled PGPROC structures.
64
 * We cannot use an LWLock because the LWLock manager depends on already
J
Jan Wieck 已提交
65
 * having a PGPROC and a wait semaphore!  But these structures are touched
66 67
 * relatively infrequently (only at backend startup or shutdown) and not for
 * very long, so a spinlock is okay.
68
 */
69
static slock_t *ProcStructLock = NULL;
70 71 72

static PROC_HDR *ProcGlobal = NULL;

J
Jan Wieck 已提交
73
static PGPROC *DummyProc = NULL;
74

75
static bool waitingForLock = false;
76
static bool waitingForSignal = false;
77 78

static void ProcKill(void);
79
static void DummyProcKill(void);
80

V
Vadim B. Mikheev 已提交
81

82 83 84 85 86 87 88 89 90 91
/*
 * Report number of semaphores needed by InitProcGlobal.
 */
int
ProcGlobalSemas(int maxBackends)
{
	/* We need a sema per backend, plus one for the dummy process. */
	return maxBackends + 1;
}

92 93
/*
 * InitProcGlobal -
94
 *	  initializes the global process table. We put it here so that
95
 *	  the postmaster can do this initialization.
96
 *
97
 *	  We also create all the per-process semaphores we will need to support
98 99 100 101 102 103 104 105 106
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
 *	  (mis)configured with small limits on the number of semaphores, and
 *	  running out when trying to start another backend is a common failure.
 *	  So, now we grab enough semaphores to support the desired max number
 *	  of backends immediately at initialization --- if the sysadmin has set
 *	  MaxBackends higher than his kernel will support, he'll find out sooner
 *	  rather than later.
107 108 109 110
 *
 *	  Another reason for creating semaphores here is that the semaphore
 *	  implementation typically requires us to create semaphores in the
 *	  postmaster, not in backends.
111 112
 */
void
113
InitProcGlobal(int maxBackends)
114
{
115
	bool		found = false;
116

117
	/* Create or attach to the ProcGlobal shared structure */
118
	ProcGlobal = (PROC_HDR *)
119
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
120

121 122
	/* --------------------
	 * We're the first - initialize.
123 124
	 * XXX if found should ever be true, it is a sign of impending doom ...
	 * ought to complain if so?
125 126 127
	 * --------------------
	 */
	if (!found)
128
	{
129
		int			i;
130

131
		ProcGlobal->freeProcs = INVALID_OFFSET;
132

B
Bruce Momjian 已提交
133
		/*
J
Jan Wieck 已提交
134
		 * Pre-create the PGPROC structures and create a semaphore for each.
135
		 */
136
		for (i = 0; i < maxBackends; i++)
137
		{
J
Jan Wieck 已提交
138
			PGPROC   *proc;
139

J
Jan Wieck 已提交
140
			proc = (PGPROC *) ShmemAlloc(sizeof(PGPROC));
141 142
			if (!proc)
				elog(FATAL, "cannot create new proc: out of memory");
J
Jan Wieck 已提交
143
			MemSet(proc, 0, sizeof(PGPROC));
144 145 146
			PGSemaphoreCreate(&proc->sem);
			proc->links.next = ProcGlobal->freeProcs;
			ProcGlobal->freeProcs = MAKE_OFFSET(proc);
147
		}
148 149

		/*
J
Jan Wieck 已提交
150
		 * Pre-allocate a PGPROC structure for dummy (checkpoint) processes,
151
		 * too.  This does not get linked into the freeProcs list.
152
		 */
J
Jan Wieck 已提交
153
		DummyProc = (PGPROC *) ShmemAlloc(sizeof(PGPROC));
154 155
		if (!DummyProc)
			elog(FATAL, "cannot create new proc: out of memory");
J
Jan Wieck 已提交
156
		MemSet(DummyProc, 0, sizeof(PGPROC));
157
		DummyProc->pid = 0;		/* marks DummyProc as not in use */
158
		PGSemaphoreCreate(&DummyProc->sem);
159 160 161 162

		/* Create ProcStructLock spinlock, too */
		ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
		SpinLockInit(ProcStructLock);
163 164 165
	}
}

166
/*
167
 * InitProcess -- initialize a per-process data structure for this backend
168 169
 */
void
170
InitProcess(void)
171
{
172
	SHMEM_OFFSET myOffset;
173 174
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;
175 176

	/*
177 178
	 * ProcGlobal should be set by a previous call to InitProcGlobal (if
	 * we are a backend, we inherit this by fork() from the postmaster).
179
	 */
180
	if (procglobal == NULL)
181
		elog(PANIC, "InitProcess: Proc Header uninitialized");
182 183 184

	if (MyProc != NULL)
		elog(ERROR, "InitProcess: you already exist");
185

186
	/*
187
	 * Try to get a proc struct from the free list.  If this fails,
J
Jan Wieck 已提交
188
	 * we must be out of PGPROC structures (not to mention semaphores).
189
	 */
190
	SpinLockAcquire(ProcStructLock);
191

192
	myOffset = procglobal->freeProcs;
193 194

	if (myOffset != INVALID_OFFSET)
195
	{
J
Jan Wieck 已提交
196
		MyProc = (PGPROC *) MAKE_PTR(myOffset);
197
		procglobal->freeProcs = MyProc->links.next;
198
		SpinLockRelease(ProcStructLock);
199 200 201 202
	}
	else
	{
		/*
J
Jan Wieck 已提交
203
		 * If we reach here, all the PGPROCs are in use.  This is one of
204 205
		 * the possible places to detect "too many backends", so give the
		 * standard error message.
206
		 */
207
		SpinLockRelease(ProcStructLock);
208
		elog(FATAL, "Sorry, too many clients already");
209
	}
210

211
	/*
212 213
	 * Initialize all fields of MyProc, except for the semaphore which
	 * was prepared for us by InitProcGlobal.
214
	 */
215
	SHMQueueElemInit(&(MyProc->links));
216
	MyProc->errType = STATUS_OK;
217
	MyProc->xid = InvalidTransactionId;
218
	MyProc->xmin = InvalidTransactionId;
219 220
	MyProc->pid = MyProcPid;
	MyProc->databaseId = MyDatabaseId;
221
	MyProc->logRec.xrecoff = 0;
222 223 224
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
225 226 227
	MyProc->waitLock = NULL;
	MyProc->waitHolder = NULL;
	SHMQueueInit(&(MyProc->procHolders));
228

229
	/*
230
	 * Arrange to clean up at backend exit.
231
	 */
232
	on_shmem_exit(ProcKill, 0);
233

234
	/*
235
	 * We might be reusing a semaphore that belonged to a failed process.
236 237
	 * So be careful and reinitialize its value here.
	 */
238
	PGSemaphoreReset(&MyProc->sem);
239

240
	/*
J
Jan Wieck 已提交
241
	 * Now that we have a PGPROC, we could try to acquire locks, so
B
Bruce Momjian 已提交
242
	 * initialize the deadlock checker.
243 244
	 */
	InitDeadLockChecking();
245 246
}

247 248 249 250
/*
 * InitDummyProcess -- create a dummy per-process data structure
 *
 * This is called by checkpoint processes so that they will have a MyProc
J
Jan Wieck 已提交
251
 * value that's real enough to let them wait for LWLocks.  The PGPROC and
252 253 254 255 256 257
 * sema that are assigned are the extra ones created during InitProcGlobal.
 */
void
InitDummyProcess(void)
{
	/*
258 259
	 * ProcGlobal should be set by a previous call to InitProcGlobal (we
	 * inherit this by fork() from the postmaster).
260 261
	 */
	if (ProcGlobal == NULL || DummyProc == NULL)
262
		elog(PANIC, "InitDummyProcess: Proc Header uninitialized");
263 264 265 266 267 268 269 270 271 272 273 274 275

	if (MyProc != NULL)
		elog(ERROR, "InitDummyProcess: you already exist");

	/*
	 * DummyProc should not presently be in use by anyone else
	 */
	if (DummyProc->pid != 0)
		elog(FATAL, "InitDummyProcess: DummyProc is in use by PID %d",
			 DummyProc->pid);
	MyProc = DummyProc;

	/*
276 277
	 * Initialize all fields of MyProc, except MyProc->sem which was set
	 * up by InitProcGlobal.
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
	 */
	MyProc->pid = MyProcPid;	/* marks DummyProc as in use by me */
	SHMQueueElemInit(&(MyProc->links));
	MyProc->errType = STATUS_OK;
	MyProc->xid = InvalidTransactionId;
	MyProc->xmin = InvalidTransactionId;
	MyProc->databaseId = MyDatabaseId;
	MyProc->logRec.xrecoff = 0;
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
	MyProc->waitLock = NULL;
	MyProc->waitHolder = NULL;
	SHMQueueInit(&(MyProc->procHolders));

	/*
	 * Arrange to clean up at process exit.
	 */
	on_shmem_exit(DummyProcKill, 0);

	/*
	 * We might be reusing a semaphore that belonged to a failed process.
	 * So be careful and reinitialize its value here.
	 */
302
	PGSemaphoreReset(&MyProc->sem);
303 304
}

305 306 307
/*
 * Cancel any pending wait for lock, when aborting a transaction.
 *
308 309
 * Returns true if we had been waiting for a lock, else false.
 *
310 311 312 313
 * (Normally, this would only happen if we accept a cancel/die
 * interrupt while waiting; but an elog(ERROR) while waiting is
 * within the realm of possibility, too.)
 */
314
bool
315 316 317 318
LockWaitCancel(void)
{
	/* Nothing to do if we weren't waiting for a lock */
	if (!waitingForLock)
319 320
		return false;

321 322 323
	waitingForLock = false;

	/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
324
	disable_sig_alarm(false);
325 326

	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
327
	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
328 329
	if (MyProc->links.next != INVALID_OFFSET)
		RemoveFromWaitQueue(MyProc);
330
	LWLockRelease(LockMgrLock);
H
Hiroshi Inoue 已提交
331

332 333 334
	/*
	 * Reset the proc wait semaphore to zero.  This is necessary in the
	 * scenario where someone else granted us the lock we wanted before we
B
Bruce Momjian 已提交
335 336 337 338 339
	 * were able to remove ourselves from the wait-list.  The semaphore
	 * will have been bumped to 1 by the would-be grantor, and since we
	 * are no longer going to wait on the sema, we have to force it back
	 * to zero. Otherwise, our next attempt to wait for a lock will fall
	 * through prematurely.
340
	 */
341
	PGSemaphoreReset(&MyProc->sem);
342 343

	/*
B
Bruce Momjian 已提交
344 345
	 * Return true even if we were kicked off the lock before we were able
	 * to remove ourselves.
346 347
	 */
	return true;
H
Hiroshi Inoue 已提交
348
}
349

350

351
/*
352 353 354 355 356 357 358 359
 * ProcReleaseLocks() -- release locks associated with current transaction
 *			at transaction commit or abort
 *
 * At commit, we release only locks tagged with the current transaction's XID,
 * leaving those marked with XID 0 (ie, session locks) undisturbed.  At abort,
 * we release all locks including XID 0, because we need to clean up after
 * a failure.  This logic will need extension if we ever support nested
 * transactions.
360
 *
361
 * Note that user locks are not released in either case.
362 363
 */
void
364
ProcReleaseLocks(bool isCommit)
365
{
366 367
	if (!MyProc)
		return;
368 369 370
	/* If waiting, get off wait queue (should only be needed after error) */
	LockWaitCancel();
	/* Release locks */
371 372
	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc,
				   !isCommit, GetCurrentTransactionId());
373 374 375 376 377
}


/*
 * ProcKill() -- Destroy the per-proc data structure for
378
 *		this process. Release any of its held LW locks.
379 380
 */
static void
381
ProcKill(void)
382
{
383 384 385
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

386
	Assert(MyProc != NULL);
387

388 389 390 391 392
	/* Release any LW locks I am holding */
	LWLockReleaseAll();

	/* Abort any buffer I/O in progress */
	AbortBufferIO();
393

394 395
	/* Get off any wait queue I might be on */
	LockWaitCancel();
396

397
	/* Remove from the standard lock table */
398
	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, true, InvalidTransactionId);
399

400 401
#ifdef USER_LOCKS
	/* Remove from the user lock table */
402
	LockReleaseAll(USER_LOCKMETHOD, MyProc, true, InvalidTransactionId);
403
#endif
404

405
	SpinLockAcquire(ProcStructLock);
406

J
Jan Wieck 已提交
407
	/* Return PGPROC structure (and semaphore) to freelist */
408 409
	MyProc->links.next = procglobal->freeProcs;
	procglobal->freeProcs = MAKE_OFFSET(MyProc);
410

J
Jan Wieck 已提交
411
	/* PGPROC struct isn't mine anymore */
412
	MyProc = NULL;
413

414 415 416 417 418
	SpinLockRelease(ProcStructLock);
}

/*
 * DummyProcKill() -- Cut-down version of ProcKill for dummy (checkpoint)
J
Jan Wieck 已提交
419
 *		processes.	The PGPROC and sema are not released, only marked
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
 *		as not-in-use.
 */
static void
DummyProcKill(void)
{
	Assert(MyProc != NULL && MyProc == DummyProc);

	/* Release any LW locks I am holding */
	LWLockReleaseAll();

	/* Abort any buffer I/O in progress */
	AbortBufferIO();

	/* I can't be on regular lock queues, so needn't check */

	/* Mark DummyProc no longer in use */
	MyProc->pid = 0;

J
Jan Wieck 已提交
438
	/* PGPROC struct isn't mine anymore */
439
	MyProc = NULL;
440 441
}

442

443 444
/*
 * ProcQueue package: routines for putting processes to sleep
445
 *		and  waking them up
446 447 448 449 450 451 452 453
 */

/*
 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
 *
 * Returns: a pointer to the queue or NULL
 * Side Effects: Initializes the queue if we allocated one
 */
454
#ifdef NOT_USED
455
PROC_QUEUE *
456 457
ProcQueueAlloc(char *name)
{
458 459
	bool		found;
	PROC_QUEUE *queue = (PROC_QUEUE *)
B
Bruce Momjian 已提交
460
	ShmemInitStruct(name, sizeof(PROC_QUEUE), &found);
461 462

	if (!queue)
463
		return NULL;
464 465
	if (!found)
		ProcQueueInit(queue);
466
	return queue;
467
}
468
#endif
469 470 471 472 473

/*
 * ProcQueueInit -- initialize a shared memory process queue
 */
void
474
ProcQueueInit(PROC_QUEUE *queue)
475
{
476 477
	SHMQueueInit(&(queue->links));
	queue->size = 0;
478 479 480 481 482 483
}


/*
 * ProcSleep -- put a process to sleep
 *
484 485
 * Caller must have set MyProc->heldLocks to reflect locks already held
 * on the lockable object by this process (under all XIDs).
486
 *
487
 * Locktable's masterLock must be held at entry, and will be held
488
 * at exit.
489
 *
490
 * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
491
 *
492
 * ASSUME: that no one will fiddle with the queue until after
493
 *		we release the masterLock.
494 495
 *
 * NOTES: The process queue is now a priority queue for locking.
496 497 498
 *
 * P() on the semaphore should put us to sleep.  The process
 * semaphore is normally zero, so when we try to acquire it, we sleep.
499 500
 */
int
501
ProcSleep(LOCKMETHODTABLE *lockMethodTable,
502 503
		  LOCKMODE lockmode,
		  LOCK *lock,
B
Bruce Momjian 已提交
504
		  PROCLOCK *holder)
505
{
B
Bruce Momjian 已提交
506
	LWLockId	masterLock = lockMethodTable->masterLock;
507 508
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			myHeldLocks = MyProc->heldLocks;
509
	bool		early_deadlock = false;
J
Jan Wieck 已提交
510
	PGPROC	   *proc;
511
	int			i;
512

513
	/*
514 515 516 517 518 519
	 * Determine where to add myself in the wait queue.
	 *
	 * Normally I should go at the end of the queue.  However, if I already
	 * hold locks that conflict with the request of any previous waiter,
	 * put myself in the queue just in front of the first such waiter.
	 * This is not a necessary step, since deadlock detection would move
520 521 522
	 * me to before that waiter anyway; but it's relatively cheap to
	 * detect such a conflict immediately, and avoid delaying till
	 * deadlock timeout.
523
	 *
524 525
	 * Special case: if I find I should go in front of some waiter, check to
	 * see if I conflict with already-held locks or the requests before
526 527
	 * that waiter.  If not, then just grant myself the requested lock
	 * immediately.  This is the same as the test for immediate grant in
528 529
	 * LockAcquire, except we are only considering the part of the wait
	 * queue before my insertion point.
530 531
	 */
	if (myHeldLocks != 0)
V
Vadim B. Mikheev 已提交
532
	{
533 534
		int			aheadRequests = 0;

J
Jan Wieck 已提交
535
		proc = (PGPROC *) MAKE_PTR(waitQueue->links.next);
536
		for (i = 0; i < waitQueue->size; i++)
V
Vadim B. Mikheev 已提交
537
		{
538
			/* Must he wait for me? */
B
Bruce Momjian 已提交
539
			if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks)
V
Vadim B. Mikheev 已提交
540
			{
541
				/* Must I wait for him ? */
B
Bruce Momjian 已提交
542
				if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks)
543
				{
544
					/*
545 546 547 548 549
					 * Yes, so we have a deadlock.	Easiest way to clean
					 * up correctly is to call RemoveFromWaitQueue(), but
					 * we can't do that until we are *on* the wait queue.
					 * So, set a flag to check below, and break out of
					 * loop.
550 551 552
					 */
					early_deadlock = true;
					break;
553
				}
554
				/* I must go before this waiter.  Check special case. */
B
Bruce Momjian 已提交
555
				if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
556 557 558 559 560 561
					LockCheckConflicts(lockMethodTable,
									   lockmode,
									   lock,
									   holder,
									   MyProc,
									   NULL) == STATUS_OK)
562
				{
563 564 565
					/* Skip the wait and just grant myself the lock. */
					GrantLock(lock, holder, lockmode);
					return STATUS_OK;
566 567
				}
				/* Break out of loop to put myself before him */
V
Vadim B. Mikheev 已提交
568
				break;
569
			}
570 571
			/* Nope, so advance to next waiter */
			aheadRequests |= (1 << proc->waitLockMode);
J
Jan Wieck 已提交
572
			proc = (PGPROC *) MAKE_PTR(proc->links.next);
V
Vadim B. Mikheev 已提交
573
		}
B
Bruce Momjian 已提交
574

575 576 577 578
		/*
		 * If we fall out of loop normally, proc points to waitQueue head,
		 * so we will insert at tail of queue as desired.
		 */
579 580 581 582
	}
	else
	{
		/* I hold no locks, so I can't push in front of anyone. */
J
Jan Wieck 已提交
583
		proc = (PGPROC *) &(waitQueue->links);
V
Vadim B. Mikheev 已提交
584
	}
585

586 587 588
	/*
	 * Insert self into queue, ahead of the given proc (or at tail of
	 * queue).
589
	 */
590
	SHMQueueInsertBefore(&(proc->links), &(MyProc->links));
B
Bruce Momjian 已提交
591
	waitQueue->size++;
592

593
	lock->waitMask |= (1 << lockmode);
594

J
Jan Wieck 已提交
595
	/* Set up wait information in PGPROC object, too */
596 597 598 599
	MyProc->waitLock = lock;
	MyProc->waitHolder = holder;
	MyProc->waitLockMode = lockmode;

600
	MyProc->errType = STATUS_OK;	/* initialize result for success */
601 602 603

	/*
	 * If we detected deadlock, give up without waiting.  This must agree
604
	 * with CheckDeadLock's recovery code, except that we shouldn't
605
	 * release the semaphore since we haven't tried to lock it yet.
606 607 608 609 610 611 612
	 */
	if (early_deadlock)
	{
		RemoveFromWaitQueue(MyProc);
		MyProc->errType = STATUS_ERROR;
		return STATUS_ERROR;
	}
613

614 615 616
	/* mark that we are waiting for a lock */
	waitingForLock = true;

617
	/*
618
	 * Release the locktable's masterLock.
619
	 *
620 621 622 623
	 * NOTE: this may also cause us to exit critical-section state, possibly
	 * allowing a cancel/die interrupt to be accepted. This is OK because
	 * we have recorded the fact that we are waiting for a lock, and so
	 * LockWaitCancel will clean up if cancel/die happens.
624
	 */
625
	LWLockRelease(masterLock);
626

627
	/*
628 629 630 631 632 633 634
	 * Set timer so we can wake up after awhile and check for a deadlock.
	 * If a deadlock is detected, the handler releases the process's
	 * semaphore and sets MyProc->errType = STATUS_ERROR, allowing us to
	 * know that we must report failure rather than success.
	 *
	 * By delaying the check until we've waited for a bit, we can avoid
	 * running the rather expensive deadlock-check code in most cases.
635
	 */
636
	if (!enable_sig_alarm(DeadlockTimeout, false))
637
		elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
638

639
	/*
640 641
	 * If someone wakes us between LWLockRelease and PGSemaphoreLock,
	 * PGSemaphoreLock will not block.  The wakeup is "saved" by the
642
	 * semaphore implementation.  Note also that if CheckDeadLock is
643
	 * invoked but does not detect a deadlock, PGSemaphoreLock() will
644 645
	 * continue to wait.  There used to be a loop here, but it was useless
	 * code...
646 647 648 649 650 651
	 *
	 * We pass interruptOK = true, which eliminates a window in which
	 * cancel/die interrupts would be held off undesirably.  This is a
	 * promise that we don't mind losing control to a cancel/die interrupt
	 * here.  We don't, because we have no state-change work to do after
	 * being granted the lock (the grantor did it all).
652
	 */
653
	PGSemaphoreLock(&MyProc->sem, true);
654

655
	/*
656
	 * Disable the timer, if it's still running
B
Bruce Momjian 已提交
657
	 */
658
	if (!disable_sig_alarm(false))
659
		elog(FATAL, "ProcSleep: Unable to disable timer for process wakeup");
B
Bruce Momjian 已提交
660

661 662 663 664 665
	/*
	 * Now there is nothing for LockWaitCancel to do.
	 */
	waitingForLock = false;

666
	/*
667
	 * Re-acquire the locktable's masterLock.
668
	 */
669
	LWLockAcquire(masterLock, LW_EXCLUSIVE);
670

671 672 673 674
	/*
	 * We don't have to do anything else, because the awaker did all the
	 * necessary update of the lock table and MyProc.
	 */
675
	return MyProc->errType;
676 677 678 679 680 681
}


/*
 * ProcWakeup -- wake up a process by releasing its private semaphore.
 *
682
 *	 Also remove the process from the wait queue and set its links invalid.
683
 *	 RETURN: the next process in the wait queue.
684 685 686 687
 *
 * XXX: presently, this code is only used for the "success" case, and only
 * works correctly for that case.  To clean up in failure case, would need
 * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
688
 */
J
Jan Wieck 已提交
689 690
PGPROC *
ProcWakeup(PGPROC *proc, int errType)
691
{
J
Jan Wieck 已提交
692
	PGPROC	   *retProc;
693

694
	/* assume that masterLock has been acquired */
695

696
	/* Proc should be sleeping ... */
697 698
	if (proc->links.prev == INVALID_OFFSET ||
		proc->links.next == INVALID_OFFSET)
J
Jan Wieck 已提交
699
		return (PGPROC *) NULL;
700

701
	/* Save next process before we zap the list link */
J
Jan Wieck 已提交
702
	retProc = (PGPROC *) MAKE_PTR(proc->links.next);
703

704
	/* Remove process from wait queue */
705
	SHMQueueDelete(&(proc->links));
706
	(proc->waitLock->waitProcs.size)--;
707

708 709 710
	/* Clean up process' state and pass it the ok/fail signal */
	proc->waitLock = NULL;
	proc->waitHolder = NULL;
711 712
	proc->errType = errType;

713
	/* And awaken it */
714
	PGSemaphoreUnlock(&proc->sem);
715 716

	return retProc;
717 718 719 720
}

/*
 * ProcLockWakeup -- routine for waking up processes when a lock is
721 722
 *		released (or a prior waiter is aborted).  Scan all waiters
 *		for lock, waken any that are no longer blocked.
723
 */
724 725
void
ProcLockWakeup(LOCKMETHODTABLE *lockMethodTable, LOCK *lock)
726
{
727 728
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			queue_size = waitQueue->size;
J
Jan Wieck 已提交
729
	PGPROC	   *proc;
730
	int			aheadRequests = 0;
M
 
Marc G. Fournier 已提交
731

732
	Assert(queue_size >= 0);
733

734 735
	if (queue_size == 0)
		return;
736

J
Jan Wieck 已提交
737
	proc = (PGPROC *) MAKE_PTR(waitQueue->links.next);
738

739 740
	while (queue_size-- > 0)
	{
B
Bruce Momjian 已提交
741
		LOCKMODE	lockmode = proc->waitLockMode;
M
 
Marc G. Fournier 已提交
742 743

		/*
744 745
		 * Waken if (a) doesn't conflict with requests of earlier waiters,
		 * and (b) doesn't conflict with already-held locks.
M
 
Marc G. Fournier 已提交
746
		 */
B
Bruce Momjian 已提交
747
		if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
748 749 750 751 752 753
			LockCheckConflicts(lockMethodTable,
							   lockmode,
							   lock,
							   proc->waitHolder,
							   proc,
							   NULL) == STATUS_OK)
M
 
Marc G. Fournier 已提交
754
		{
755 756 757
			/* OK to waken */
			GrantLock(lock, proc->waitHolder, lockmode);
			proc = ProcWakeup(proc, STATUS_OK);
B
Bruce Momjian 已提交
758

759
			/*
B
Bruce Momjian 已提交
760 761 762
			 * ProcWakeup removes proc from the lock's waiting process
			 * queue and returns the next proc in chain; don't use proc's
			 * next-link, because it's been cleared.
763
			 */
M
 
Marc G. Fournier 已提交
764
		}
765
		else
766
		{
B
Bruce Momjian 已提交
767 768 769 770
			/*
			 * Cannot wake this guy. Remember his request for later
			 * checks.
			 */
771
			aheadRequests |= (1 << lockmode);
J
Jan Wieck 已提交
772
			proc = (PGPROC *) MAKE_PTR(proc->links.next);
773
		}
M
 
Marc G. Fournier 已提交
774
	}
775 776

	Assert(waitQueue->size >= 0);
777 778 779
}

/* --------------------
780
 * We only get to this routine if we got SIGALRM after DeadlockTimeout
781 782 783 784
 * while waiting for a lock to be released by some other process.  Look
 * to see if there's a deadlock; if not, just return and continue waiting.
 * If we have a real deadlock, remove ourselves from the lock's wait queue
 * and signal an error to ProcSleep.
785 786
 * --------------------
 */
787
void
788
CheckDeadLock(void)
789
{
790
	int			save_errno = errno;
791

792
	/*
B
Bruce Momjian 已提交
793 794
	 * Acquire locktable lock.	Note that the SIGALRM interrupt had better
	 * not be enabled anywhere that this process itself holds the
795
	 * locktable lock, else this will wait forever.  Also note that
796 797
	 * LWLockAcquire creates a critical section, so that this routine
	 * cannot be interrupted by cancel/die interrupts.
798
	 */
799
	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
800

801
	/*
802 803 804
	 * Check to see if we've been awoken by anyone in the interim.
	 *
	 * If we have we can return and resume our transaction -- happy day.
805 806
	 * Before we are awoken the process releasing the lock grants it to us
	 * so we know that we don't have to wait anymore.
807
	 *
808
	 * We check by looking to see if we've been unlinked from the wait queue.
809 810 811 812
	 * This is quicker than checking our semaphore's state, since no
	 * kernel call is needed, and it is safe because we hold the locktable
	 * lock.
	 *
813 814 815 816
	 */
	if (MyProc->links.prev == INVALID_OFFSET ||
		MyProc->links.next == INVALID_OFFSET)
	{
817
		LWLockRelease(LockMgrLock);
818
		errno = save_errno;
819 820 821
		return;
	}

822
#ifdef LOCK_DEBUG
B
Bruce Momjian 已提交
823 824
	if (Debug_deadlocks)
		DumpAllLocks();
825 826
#endif

827
	if (!DeadLockCheck(MyProc))
B
Bruce Momjian 已提交
828
	{
829
		/* No deadlock, so keep waiting */
830
		LWLockRelease(LockMgrLock);
831
		errno = save_errno;
B
Bruce Momjian 已提交
832 833 834
		return;
	}

835
	/*
836 837 838
	 * Oops.  We have a deadlock.
	 *
	 * Get this process out of wait state.
839
	 */
840 841
	RemoveFromWaitQueue(MyProc);

842 843 844
	/*
	 * Set MyProc->errType to STATUS_ERROR so that ProcSleep will report
	 * an error after we return from this signal handler.
845 846
	 */
	MyProc->errType = STATUS_ERROR;
847

848 849 850
	/*
	 * Unlock my semaphore so that the interrupted ProcSleep() call can
	 * finish.
851
	 */
852
	PGSemaphoreUnlock(&MyProc->sem);
853

854 855 856 857 858 859 860 861 862
	/*
	 * We're done here.  Transaction abort caused by the error that
	 * ProcSleep will raise will cause any other locks we hold to be
	 * released, thus allowing other processes to wake up; we don't need
	 * to do that here. NOTE: an exception is that releasing locks we hold
	 * doesn't consider the possibility of waiters that were blocked
	 * behind us on the lock we just failed to get, and might now be
	 * wakable because we're not in front of them anymore.  However,
	 * RemoveFromWaitQueue took care of waking up any such processes.
863
	 */
864
	LWLockRelease(LockMgrLock);
865
	errno = save_errno;
866 867 868
}


869 870 871 872 873 874 875 876 877 878 879 880
/*
 * ProcWaitForSignal - wait for a signal from another backend.
 *
 * This can share the semaphore normally used for waiting for locks,
 * since a backend could never be waiting for a lock and a signal at
 * the same time.  As with locks, it's OK if the signal arrives just
 * before we actually reach the waiting state.
 */
void
ProcWaitForSignal(void)
{
	waitingForSignal = true;
881
	PGSemaphoreLock(&MyProc->sem, true);
882 883 884 885 886 887 888 889 890 891 892 893 894
	waitingForSignal = false;
}

/*
 * ProcCancelWaitForSignal - clean up an aborted wait for signal
 *
 * We need this in case the signal arrived after we aborted waiting,
 * or if it arrived but we never reached ProcWaitForSignal() at all.
 * Caller should call this after resetting the signal request status.
 */
void
ProcCancelWaitForSignal(void)
{
895
	PGSemaphoreReset(&MyProc->sem);
896 897 898 899 900 901 902 903 904
	waitingForSignal = false;
}

/*
 * ProcSendSignal - send a signal to a backend identified by BackendId
 */
void
ProcSendSignal(BackendId procId)
{
J
Jan Wieck 已提交
905
	PGPROC	   *proc = BackendIdGetProc(procId);
906 907

	if (proc != NULL)
908
		PGSemaphoreUnlock(&proc->sem);
909 910 911
}


912 913 914 915 916 917 918 919 920
/*****************************************************************************
 * SIGALRM interrupt support
 *
 * Maybe these should be in pqsignal.c?
 *****************************************************************************/

/*
 * Enable the SIGALRM interrupt to fire after the specified delay
 *
921
 * Delay is given in milliseconds.	Caller should be sure a SIGALRM
922 923
 * signal handler is installed before this is called.
 *
924 925 926
 * This code properly handles multiple alarms when the statement_timeout
 * alarm is specified first.
 *
927 928 929
 * Returns TRUE if okay, FALSE on failure.
 */
bool
930
enable_sig_alarm(int delayms, bool is_statement_timeout)
931 932
{
#ifndef __BEOS__
933 934 935 936
	struct itimerval timeval, remaining;
#else
	bigtime_t	time_interval, remaining;
#endif
937

938 939 940 941 942 943 944
	/* Don't set timer if the statement timeout scheduled before next alarm. */
	if (alarm_is_statement_timeout &&
		!is_statement_timeout &&
		RemainingStatementTimeout <= delayms)
		return true;

#ifndef __BEOS__
945 946 947
	MemSet(&timeval, 0, sizeof(struct itimerval));
	timeval.it_value.tv_sec = delayms / 1000;
	timeval.it_value.tv_usec = (delayms % 1000) * 1000;
948
	if (setitimer(ITIMER_REAL, &timeval, &remaining))
949 950 951
		return false;
#else
	/* BeOS doesn't have setitimer, but has set_alarm */
952
	time_interval = delayms * 1000;		/* usecs */
953
	if ((remaining = set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM)) < 0)
954 955 956
		return false;
#endif

957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007
	if (is_statement_timeout)
		RemainingStatementTimeout = StatementTimeout;
	else
	{
		/* Switching to non-statement-timeout alarm, get remaining time */
		if (alarm_is_statement_timeout)
		{
#ifndef __BEOS__
			/* We lose precision here because we convert to milliseconds */
			RemainingStatementTimeout = remaining.it_value.tv_sec * 1000 +
										remaining.it_value.tv_usec / 1000;
#else
			RemainingStatementTimeout = remaining / 1000;
#endif
			/* Rounding could cause a zero */
			if (RemainingStatementTimeout == 0)
				RemainingStatementTimeout = 1;
		}

		if (RemainingStatementTimeout)
		{
			/* Remaining timeout alarm < delayms? */
			if (RemainingStatementTimeout <= delayms)
			{
				/* reinstall statement timeout alarm */
				alarm_is_statement_timeout = true;
#ifndef __BEOS__
				remaining.it_value.tv_sec = RemainingStatementTimeout / 1000;
				remaining.it_value.tv_usec = (RemainingStatementTimeout % 1000) * 1000;
			 	if (setitimer(ITIMER_REAL, &remaining, &timeval))
					return false;
				else
					return true;
#else
				remaining = RemainingStatementTimeout * 1000;
				if ((timeval = set_alarm(remaining, B_ONE_SHOT_RELATIVE_ALARM)) < 0)
					return false;
				else
					return true;
#endif
			}
			else
				RemainingStatementTimeout -= delayms;
		}
	}

	if (is_statement_timeout)
		alarm_is_statement_timeout = true;
	else
		alarm_is_statement_timeout = false;

1008 1009 1010 1011
	return true;
}

/*
1012 1013 1014 1015
 * Cancel the SIGALRM timer.
 *
 * This is also called if the timer has fired to reschedule
 * the statement_timeout timer.
1016 1017 1018 1019
 *
 * Returns TRUE if okay, FALSE on failure.
 */
bool
1020
disable_sig_alarm(bool is_statement_timeout)
1021 1022
{
#ifndef __BEOS__
1023
	struct itimerval timeval, remaining;
1024 1025
	MemSet(&timeval, 0, sizeof(struct itimerval));
#else
1026
	bigtime_t time_interval = 0;
1027 1028
#endif

1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076
	if (!is_statement_timeout && RemainingStatementTimeout)
	{
#ifndef __BEOS__
		/* turn off timer and get remaining time, if any */
		if (setitimer(ITIMER_REAL, &timeval, &remaining))
			return false;
		/* Add remaining time back because the timer didn't complete */
		RemainingStatementTimeout += remaining.it_value.tv_sec * 1000 +
									 remaining.it_value.tv_usec / 1000;
		/* Prepare to set timer */
		timeval.it_value.tv_sec = RemainingStatementTimeout / 1000;
		timeval.it_value.tv_usec = (RemainingStatementTimeout % 1000) * 1000;
#else
		/* BeOS doesn't have setitimer, but has set_alarm */
		if ((time_interval = set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM)) < 0)
			return false;
		RemainingStatementTimeout += time_interval / 1000;
		time_interval = RemainingStatementTimeout * 1000;
#endif
		/* Restore remaining statement timeout value */
		alarm_is_statement_timeout = true;
	}
	/*
	 *	Optimization: is_statement_timeout && RemainingStatementTimeout == 0
	 *  does nothing.  This is for cases where no timeout was set.
	 */
	if (!is_statement_timeout || RemainingStatementTimeout)
	{
#ifndef __BEOS__
		if (setitimer(ITIMER_REAL, &timeval, &remaining))
			return false;
#else
		if (time_interval)
		{
			if (set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM) < 0)
				return false;
		}
		else
		{
			if (set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM) < 0)
				return false;
		}
#endif
	}

	if (is_statement_timeout)
		RemainingStatementTimeout = 0;

1077 1078
	return true;
}
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100


/*
 * Call alarm handler, either StatementCancel or Deadlock checker.
 */
void
handle_sig_alarm(SIGNAL_ARGS)
{
	if (alarm_is_statement_timeout)
	{
		RemainingStatementTimeout = 0;
		alarm_is_statement_timeout = false;
		kill(MyProcPid, SIGINT);
	}
	else
	{
		CheckDeadLock();
		/* Reactivate any statement_timeout alarm. */
		disable_sig_alarm(false);
	}
}