proc.c 32.1 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * proc.c
4
 *	  routines to manage per-process shared memory data structure
5
 *
B
Bruce Momjian 已提交
6
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.131 2003/07/24 22:04:14 tgl Exp $
12 13 14 15 16
 *
 *-------------------------------------------------------------------------
 */
/*
 * Interface (a):
17
 *		ProcSleep(), ProcWakeup(),
18 19
 *		ProcQueueAlloc() -- create a shm queue for sleeping processes
 *		ProcQueueInit() -- create a queue without allocing memory
20 21 22 23 24 25 26 27
 *
 * Locking and waiting for buffers can cause the backend to be
 * put to sleep.  Whoever releases the lock, etc. wakes the
 * process up again (and gives it an error code so it knows
 * whether it was awoken on an error condition).
 *
 * Interface (b):
 *
28 29
 * ProcReleaseLocks -- frees the locks associated with current transaction
 *
30
 * ProcKill -- destroys the shared memory state (and locks)
31
 *		associated with the process.
32 33
 *
 * 5/15/91 -- removed the buffer pool based lock chain in favor
34 35 36 37 38 39
 *		of a shared memory lock chain.	The write-protection is
 *		more expensive if the lock chain is in the buffer pool.
 *		The only reason I kept the lock chain in the buffer pool
 *		in the first place was to allow the lock table to grow larger
 *		than available shared memory and that isn't going to work
 *		without a lot of unimplemented support anyway.
40
 */
41 42
#include "postgres.h"

43
#include <errno.h>
44
#include <signal.h>
45 46
#include <unistd.h>
#include <sys/time.h>
M
Marc G. Fournier 已提交
47

48
#include "miscadmin.h"
49
#include "access/xact.h"
50
#include "storage/ipc.h"
51
#include "storage/proc.h"
52
#include "storage/sinval.h"
53
#include "storage/spin.h"
54

55
/* GUC variables */
B
Bruce Momjian 已提交
56
int			DeadlockTimeout = 1000;
57
int			StatementTimeout = 0;
M
 
Marc G. Fournier 已提交
58

59
/* Pointer to this process's PGPROC struct, if any */
J
Jan Wieck 已提交
60
PGPROC	   *MyProc = NULL;
61 62

/*
J
Jan Wieck 已提交
63
 * This spinlock protects the freelist of recycled PGPROC structures.
64
 * We cannot use an LWLock because the LWLock manager depends on already
J
Jan Wieck 已提交
65
 * having a PGPROC and a wait semaphore!  But these structures are touched
66 67
 * relatively infrequently (only at backend startup or shutdown) and not for
 * very long, so a spinlock is okay.
68
 */
69
static slock_t *ProcStructLock = NULL;
70 71 72

static PROC_HDR *ProcGlobal = NULL;

J
Jan Wieck 已提交
73
static PGPROC *DummyProc = NULL;
74

75
static bool waitingForLock = false;
76
static bool waitingForSignal = false;
77

78 79 80 81 82 83 84
/* Mark these volatile because they can be changed by signal handler */
static volatile bool statement_timeout_active = false;
static volatile bool deadlock_timeout_active = false;
/* statement_fin_time is valid only if statement_timeout_active is true */
static struct timeval statement_fin_time;


85
static void ProcKill(void);
86
static void DummyProcKill(void);
87
static bool CheckStatementTimeout(void);
88

V
Vadim B. Mikheev 已提交
89

90 91 92 93 94 95 96 97 98 99
/*
 * Report number of semaphores needed by InitProcGlobal.
 */
int
ProcGlobalSemas(int maxBackends)
{
	/* We need a sema per backend, plus one for the dummy process. */
	return maxBackends + 1;
}

100 101
/*
 * InitProcGlobal -
102
 *	  initializes the global process table. We put it here so that
103
 *	  the postmaster can do this initialization.
104
 *
105
 *	  We also create all the per-process semaphores we will need to support
106 107 108 109 110 111 112 113 114
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
 *	  (mis)configured with small limits on the number of semaphores, and
 *	  running out when trying to start another backend is a common failure.
 *	  So, now we grab enough semaphores to support the desired max number
 *	  of backends immediately at initialization --- if the sysadmin has set
 *	  MaxBackends higher than his kernel will support, he'll find out sooner
 *	  rather than later.
115 116 117 118
 *
 *	  Another reason for creating semaphores here is that the semaphore
 *	  implementation typically requires us to create semaphores in the
 *	  postmaster, not in backends.
119 120
 */
void
121
InitProcGlobal(int maxBackends)
122
{
123
	bool		found = false;
124

125
	/* Create or attach to the ProcGlobal shared structure */
126
	ProcGlobal = (PROC_HDR *)
127
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
128

129 130
	/* --------------------
	 * We're the first - initialize.
131 132
	 * XXX if found should ever be true, it is a sign of impending doom ...
	 * ought to complain if so?
133 134 135
	 * --------------------
	 */
	if (!found)
136
	{
137
		int			i;
138

139
		ProcGlobal->freeProcs = INVALID_OFFSET;
140

B
Bruce Momjian 已提交
141
		/*
B
Bruce Momjian 已提交
142 143
		 * Pre-create the PGPROC structures and create a semaphore for
		 * each.
144
		 */
145
		for (i = 0; i < maxBackends; i++)
146
		{
B
Bruce Momjian 已提交
147
			PGPROC	   *proc;
148

J
Jan Wieck 已提交
149
			proc = (PGPROC *) ShmemAlloc(sizeof(PGPROC));
150
			if (!proc)
151 152 153
				ereport(FATAL,
						(errcode(ERRCODE_OUT_OF_MEMORY),
						 errmsg("out of memory")));
J
Jan Wieck 已提交
154
			MemSet(proc, 0, sizeof(PGPROC));
155 156 157
			PGSemaphoreCreate(&proc->sem);
			proc->links.next = ProcGlobal->freeProcs;
			ProcGlobal->freeProcs = MAKE_OFFSET(proc);
158
		}
159 160

		/*
B
Bruce Momjian 已提交
161 162 163
		 * Pre-allocate a PGPROC structure for dummy (checkpoint)
		 * processes, too.	This does not get linked into the freeProcs
		 * list.
164
		 */
J
Jan Wieck 已提交
165
		DummyProc = (PGPROC *) ShmemAlloc(sizeof(PGPROC));
166
		if (!DummyProc)
167 168 169
			ereport(FATAL,
					(errcode(ERRCODE_OUT_OF_MEMORY),
					 errmsg("out of memory")));
J
Jan Wieck 已提交
170
		MemSet(DummyProc, 0, sizeof(PGPROC));
171
		DummyProc->pid = 0;		/* marks DummyProc as not in use */
172
		PGSemaphoreCreate(&DummyProc->sem);
173 174 175 176

		/* Create ProcStructLock spinlock, too */
		ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
		SpinLockInit(ProcStructLock);
177 178 179
	}
}

180
/*
181
 * InitProcess -- initialize a per-process data structure for this backend
182 183
 */
void
184
InitProcess(void)
185
{
186
	SHMEM_OFFSET myOffset;
B
Bruce Momjian 已提交
187

188 189
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;
190 191

	/*
192 193
	 * ProcGlobal should be set by a previous call to InitProcGlobal (if
	 * we are a backend, we inherit this by fork() from the postmaster).
194
	 */
195
	if (procglobal == NULL)
196
		elog(PANIC, "proc header uninitialized");
197 198

	if (MyProc != NULL)
199
		elog(ERROR, "you already exist");
200

201
	/*
B
Bruce Momjian 已提交
202 203
	 * Try to get a proc struct from the free list.  If this fails, we
	 * must be out of PGPROC structures (not to mention semaphores).
204
	 */
205
	SpinLockAcquire(ProcStructLock);
206

207
	myOffset = procglobal->freeProcs;
208 209

	if (myOffset != INVALID_OFFSET)
210
	{
J
Jan Wieck 已提交
211
		MyProc = (PGPROC *) MAKE_PTR(myOffset);
212
		procglobal->freeProcs = MyProc->links.next;
213
		SpinLockRelease(ProcStructLock);
214 215 216 217
	}
	else
	{
		/*
J
Jan Wieck 已提交
218
		 * If we reach here, all the PGPROCs are in use.  This is one of
219 220
		 * the possible places to detect "too many backends", so give the
		 * standard error message.
221
		 */
222
		SpinLockRelease(ProcStructLock);
223 224 225
		ereport(FATAL,
				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
				 errmsg("sorry, too many clients already")));
226
	}
227

228
	/*
B
Bruce Momjian 已提交
229 230
	 * Initialize all fields of MyProc, except for the semaphore which was
	 * prepared for us by InitProcGlobal.
231
	 */
232
	SHMQueueElemInit(&(MyProc->links));
233
	MyProc->errType = STATUS_OK;
234
	MyProc->xid = InvalidTransactionId;
235
	MyProc->xmin = InvalidTransactionId;
236 237
	MyProc->pid = MyProcPid;
	MyProc->databaseId = MyDatabaseId;
238
	MyProc->logRec.xrecoff = 0;
239 240 241
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
242 243 244
	MyProc->waitLock = NULL;
	MyProc->waitHolder = NULL;
	SHMQueueInit(&(MyProc->procHolders));
245

246
	/*
247
	 * Arrange to clean up at backend exit.
248
	 */
249
	on_shmem_exit(ProcKill, 0);
250

251
	/*
252
	 * We might be reusing a semaphore that belonged to a failed process.
253 254
	 * So be careful and reinitialize its value here.
	 */
255
	PGSemaphoreReset(&MyProc->sem);
256

257
	/*
J
Jan Wieck 已提交
258
	 * Now that we have a PGPROC, we could try to acquire locks, so
B
Bruce Momjian 已提交
259
	 * initialize the deadlock checker.
260 261
	 */
	InitDeadLockChecking();
262 263
}

264 265 266 267
/*
 * InitDummyProcess -- create a dummy per-process data structure
 *
 * This is called by checkpoint processes so that they will have a MyProc
J
Jan Wieck 已提交
268
 * value that's real enough to let them wait for LWLocks.  The PGPROC and
269 270 271 272 273 274
 * sema that are assigned are the extra ones created during InitProcGlobal.
 */
void
InitDummyProcess(void)
{
	/*
275 276
	 * ProcGlobal should be set by a previous call to InitProcGlobal (we
	 * inherit this by fork() from the postmaster).
277 278
	 */
	if (ProcGlobal == NULL || DummyProc == NULL)
279
		elog(PANIC, "proc header uninitialized");
280 281

	if (MyProc != NULL)
282
		elog(ERROR, "you already exist");
283 284 285 286 287

	/*
	 * DummyProc should not presently be in use by anyone else
	 */
	if (DummyProc->pid != 0)
288
		elog(FATAL, "DummyProc is in use by PID %d", DummyProc->pid);
289 290 291
	MyProc = DummyProc;

	/*
292 293
	 * Initialize all fields of MyProc, except MyProc->sem which was set
	 * up by InitProcGlobal.
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
	 */
	MyProc->pid = MyProcPid;	/* marks DummyProc as in use by me */
	SHMQueueElemInit(&(MyProc->links));
	MyProc->errType = STATUS_OK;
	MyProc->xid = InvalidTransactionId;
	MyProc->xmin = InvalidTransactionId;
	MyProc->databaseId = MyDatabaseId;
	MyProc->logRec.xrecoff = 0;
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
	MyProc->waitLock = NULL;
	MyProc->waitHolder = NULL;
	SHMQueueInit(&(MyProc->procHolders));

	/*
	 * Arrange to clean up at process exit.
	 */
	on_shmem_exit(DummyProcKill, 0);

	/*
	 * We might be reusing a semaphore that belonged to a failed process.
	 * So be careful and reinitialize its value here.
	 */
318
	PGSemaphoreReset(&MyProc->sem);
319 320
}

321 322 323
/*
 * Cancel any pending wait for lock, when aborting a transaction.
 *
324 325
 * Returns true if we had been waiting for a lock, else false.
 *
326
 * (Normally, this would only happen if we accept a cancel/die
327
 * interrupt while waiting; but an ereport(ERROR) while waiting is
328 329
 * within the realm of possibility, too.)
 */
330
bool
331 332 333 334
LockWaitCancel(void)
{
	/* Nothing to do if we weren't waiting for a lock */
	if (!waitingForLock)
335 336
		return false;

337 338 339
	waitingForLock = false;

	/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
340
	disable_sig_alarm(false);
341 342

	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
343
	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
344 345
	if (MyProc->links.next != INVALID_OFFSET)
		RemoveFromWaitQueue(MyProc);
346
	LWLockRelease(LockMgrLock);
H
Hiroshi Inoue 已提交
347

348 349 350
	/*
	 * Reset the proc wait semaphore to zero.  This is necessary in the
	 * scenario where someone else granted us the lock we wanted before we
B
Bruce Momjian 已提交
351 352 353 354 355
	 * were able to remove ourselves from the wait-list.  The semaphore
	 * will have been bumped to 1 by the would-be grantor, and since we
	 * are no longer going to wait on the sema, we have to force it back
	 * to zero. Otherwise, our next attempt to wait for a lock will fall
	 * through prematurely.
356
	 */
357
	PGSemaphoreReset(&MyProc->sem);
358 359

	/*
B
Bruce Momjian 已提交
360 361
	 * Return true even if we were kicked off the lock before we were able
	 * to remove ourselves.
362 363
	 */
	return true;
H
Hiroshi Inoue 已提交
364
}
365

366

367
/*
368 369 370 371 372 373 374 375
 * ProcReleaseLocks() -- release locks associated with current transaction
 *			at transaction commit or abort
 *
 * At commit, we release only locks tagged with the current transaction's XID,
 * leaving those marked with XID 0 (ie, session locks) undisturbed.  At abort,
 * we release all locks including XID 0, because we need to clean up after
 * a failure.  This logic will need extension if we ever support nested
 * transactions.
376
 *
377
 * Note that user locks are not released in either case.
378 379
 */
void
380
ProcReleaseLocks(bool isCommit)
381
{
382 383
	if (!MyProc)
		return;
384 385 386
	/* If waiting, get off wait queue (should only be needed after error) */
	LockWaitCancel();
	/* Release locks */
387 388
	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc,
				   !isCommit, GetCurrentTransactionId());
389 390 391 392 393
}


/*
 * ProcKill() -- Destroy the per-proc data structure for
394
 *		this process. Release any of its held LW locks.
395 396
 */
static void
397
ProcKill(void)
398
{
399 400 401
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

402
	Assert(MyProc != NULL);
403

404 405 406
	/* Release any LW locks I am holding */
	LWLockReleaseAll();

407 408 409 410 411 412
	/*
	 * Make real sure we release any buffer locks and pins we might be
	 * holding, too.  It is pretty ugly to do this here and not in a
	 * shutdown callback registered by the bufmgr ... but we must do this
	 * *after* LWLockReleaseAll and *before* zapping MyProc.
	 */
413
	AbortBufferIO();
414 415
	UnlockBuffers();
	AtEOXact_Buffers(false);
416

417 418
	/* Get off any wait queue I might be on */
	LockWaitCancel();
419

420
	/* Remove from the standard lock table */
421
	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, true, InvalidTransactionId);
422

423 424
#ifdef USER_LOCKS
	/* Remove from the user lock table */
425
	LockReleaseAll(USER_LOCKMETHOD, MyProc, true, InvalidTransactionId);
426
#endif
427

428
	SpinLockAcquire(ProcStructLock);
429

J
Jan Wieck 已提交
430
	/* Return PGPROC structure (and semaphore) to freelist */
431 432
	MyProc->links.next = procglobal->freeProcs;
	procglobal->freeProcs = MAKE_OFFSET(MyProc);
433

J
Jan Wieck 已提交
434
	/* PGPROC struct isn't mine anymore */
435
	MyProc = NULL;
436

437 438 439 440 441
	SpinLockRelease(ProcStructLock);
}

/*
 * DummyProcKill() -- Cut-down version of ProcKill for dummy (checkpoint)
J
Jan Wieck 已提交
442
 *		processes.	The PGPROC and sema are not released, only marked
443 444 445 446 447 448 449 450 451 452
 *		as not-in-use.
 */
static void
DummyProcKill(void)
{
	Assert(MyProc != NULL && MyProc == DummyProc);

	/* Release any LW locks I am holding */
	LWLockReleaseAll();

453
	/* Release buffer locks and pins, too */
454
	AbortBufferIO();
455 456
	UnlockBuffers();
	AtEOXact_Buffers(false);
457 458 459 460 461 462

	/* I can't be on regular lock queues, so needn't check */

	/* Mark DummyProc no longer in use */
	MyProc->pid = 0;

J
Jan Wieck 已提交
463
	/* PGPROC struct isn't mine anymore */
464
	MyProc = NULL;
465 466
}

467

468 469
/*
 * ProcQueue package: routines for putting processes to sleep
470
 *		and  waking them up
471 472 473 474 475 476 477 478
 */

/*
 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
 *
 * Returns: a pointer to the queue or NULL
 * Side Effects: Initializes the queue if we allocated one
 */
479
#ifdef NOT_USED
480
PROC_QUEUE *
481 482
ProcQueueAlloc(char *name)
{
483 484
	bool		found;
	PROC_QUEUE *queue = (PROC_QUEUE *)
B
Bruce Momjian 已提交
485
	ShmemInitStruct(name, sizeof(PROC_QUEUE), &found);
486 487

	if (!queue)
488
		return NULL;
489 490
	if (!found)
		ProcQueueInit(queue);
491
	return queue;
492
}
493
#endif
494 495 496 497 498

/*
 * ProcQueueInit -- initialize a shared memory process queue
 */
void
499
ProcQueueInit(PROC_QUEUE *queue)
500
{
501 502
	SHMQueueInit(&(queue->links));
	queue->size = 0;
503 504 505 506 507 508
}


/*
 * ProcSleep -- put a process to sleep
 *
509 510
 * Caller must have set MyProc->heldLocks to reflect locks already held
 * on the lockable object by this process (under all XIDs).
511
 *
512
 * Locktable's masterLock must be held at entry, and will be held
513
 * at exit.
514
 *
515
 * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
516
 *
517
 * ASSUME: that no one will fiddle with the queue until after
518
 *		we release the masterLock.
519 520
 *
 * NOTES: The process queue is now a priority queue for locking.
521 522 523
 *
 * P() on the semaphore should put us to sleep.  The process
 * semaphore is normally zero, so when we try to acquire it, we sleep.
524 525
 */
int
526
ProcSleep(LOCKMETHODTABLE *lockMethodTable,
527 528
		  LOCKMODE lockmode,
		  LOCK *lock,
529
		  PROCLOCK *proclock)
530
{
B
Bruce Momjian 已提交
531
	LWLockId	masterLock = lockMethodTable->masterLock;
532 533
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			myHeldLocks = MyProc->heldLocks;
534
	bool		early_deadlock = false;
J
Jan Wieck 已提交
535
	PGPROC	   *proc;
536
	int			i;
537

538
	/*
539 540 541 542 543 544
	 * Determine where to add myself in the wait queue.
	 *
	 * Normally I should go at the end of the queue.  However, if I already
	 * hold locks that conflict with the request of any previous waiter,
	 * put myself in the queue just in front of the first such waiter.
	 * This is not a necessary step, since deadlock detection would move
545 546 547
	 * me to before that waiter anyway; but it's relatively cheap to
	 * detect such a conflict immediately, and avoid delaying till
	 * deadlock timeout.
548
	 *
549 550
	 * Special case: if I find I should go in front of some waiter, check to
	 * see if I conflict with already-held locks or the requests before
551 552
	 * that waiter.  If not, then just grant myself the requested lock
	 * immediately.  This is the same as the test for immediate grant in
553 554
	 * LockAcquire, except we are only considering the part of the wait
	 * queue before my insertion point.
555 556
	 */
	if (myHeldLocks != 0)
V
Vadim B. Mikheev 已提交
557
	{
558 559
		int			aheadRequests = 0;

J
Jan Wieck 已提交
560
		proc = (PGPROC *) MAKE_PTR(waitQueue->links.next);
561
		for (i = 0; i < waitQueue->size; i++)
V
Vadim B. Mikheev 已提交
562
		{
563
			/* Must he wait for me? */
B
Bruce Momjian 已提交
564
			if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks)
V
Vadim B. Mikheev 已提交
565
			{
566
				/* Must I wait for him ? */
B
Bruce Momjian 已提交
567
				if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks)
568
				{
569
					/*
570 571 572 573
					 * Yes, so we have a deadlock.	Easiest way to clean
					 * up correctly is to call RemoveFromWaitQueue(), but
					 * we can't do that until we are *on* the wait queue.
					 * So, set a flag to check below, and break out of
574
					 * loop.  Also, record deadlock info for later message.
575
					 */
576
					RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
577 578
					early_deadlock = true;
					break;
579
				}
580
				/* I must go before this waiter.  Check special case. */
B
Bruce Momjian 已提交
581
				if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
582 583 584
					LockCheckConflicts(lockMethodTable,
									   lockmode,
									   lock,
585
									   proclock,
586 587
									   MyProc,
									   NULL) == STATUS_OK)
588
				{
589
					/* Skip the wait and just grant myself the lock. */
590
					GrantLock(lock, proclock, lockmode);
591
					return STATUS_OK;
592 593
				}
				/* Break out of loop to put myself before him */
V
Vadim B. Mikheev 已提交
594
				break;
595
			}
596 597
			/* Nope, so advance to next waiter */
			aheadRequests |= (1 << proc->waitLockMode);
J
Jan Wieck 已提交
598
			proc = (PGPROC *) MAKE_PTR(proc->links.next);
V
Vadim B. Mikheev 已提交
599
		}
B
Bruce Momjian 已提交
600

601 602 603 604
		/*
		 * If we fall out of loop normally, proc points to waitQueue head,
		 * so we will insert at tail of queue as desired.
		 */
605 606 607 608
	}
	else
	{
		/* I hold no locks, so I can't push in front of anyone. */
J
Jan Wieck 已提交
609
		proc = (PGPROC *) &(waitQueue->links);
V
Vadim B. Mikheev 已提交
610
	}
611

612 613 614
	/*
	 * Insert self into queue, ahead of the given proc (or at tail of
	 * queue).
615
	 */
616
	SHMQueueInsertBefore(&(proc->links), &(MyProc->links));
B
Bruce Momjian 已提交
617
	waitQueue->size++;
618

619
	lock->waitMask |= (1 << lockmode);
620

J
Jan Wieck 已提交
621
	/* Set up wait information in PGPROC object, too */
622
	MyProc->waitLock = lock;
623
	MyProc->waitHolder = proclock;
624 625
	MyProc->waitLockMode = lockmode;

626
	MyProc->errType = STATUS_OK;	/* initialize result for success */
627 628 629

	/*
	 * If we detected deadlock, give up without waiting.  This must agree
630
	 * with CheckDeadLock's recovery code, except that we shouldn't
631
	 * release the semaphore since we haven't tried to lock it yet.
632 633 634 635 636 637 638
	 */
	if (early_deadlock)
	{
		RemoveFromWaitQueue(MyProc);
		MyProc->errType = STATUS_ERROR;
		return STATUS_ERROR;
	}
639

640 641 642
	/* mark that we are waiting for a lock */
	waitingForLock = true;

643
	/*
644
	 * Release the locktable's masterLock.
645
	 *
646 647 648 649
	 * NOTE: this may also cause us to exit critical-section state, possibly
	 * allowing a cancel/die interrupt to be accepted. This is OK because
	 * we have recorded the fact that we are waiting for a lock, and so
	 * LockWaitCancel will clean up if cancel/die happens.
650
	 */
651
	LWLockRelease(masterLock);
652

653
	/*
654 655 656 657 658 659 660
	 * Set timer so we can wake up after awhile and check for a deadlock.
	 * If a deadlock is detected, the handler releases the process's
	 * semaphore and sets MyProc->errType = STATUS_ERROR, allowing us to
	 * know that we must report failure rather than success.
	 *
	 * By delaying the check until we've waited for a bit, we can avoid
	 * running the rather expensive deadlock-check code in most cases.
661
	 */
662
	if (!enable_sig_alarm(DeadlockTimeout, false))
663
		elog(FATAL, "could not set timer for process wakeup");
664

665
	/*
666
	 * If someone wakes us between LWLockRelease and PGSemaphoreLock,
B
Bruce Momjian 已提交
667
	 * PGSemaphoreLock will not block.	The wakeup is "saved" by the
668
	 * semaphore implementation.  Note also that if CheckDeadLock is
669
	 * invoked but does not detect a deadlock, PGSemaphoreLock() will
670 671
	 * continue to wait.  There used to be a loop here, but it was useless
	 * code...
672 673 674 675 676 677
	 *
	 * We pass interruptOK = true, which eliminates a window in which
	 * cancel/die interrupts would be held off undesirably.  This is a
	 * promise that we don't mind losing control to a cancel/die interrupt
	 * here.  We don't, because we have no state-change work to do after
	 * being granted the lock (the grantor did it all).
678
	 */
679
	PGSemaphoreLock(&MyProc->sem, true);
680

681
	/*
682
	 * Disable the timer, if it's still running
B
Bruce Momjian 已提交
683
	 */
684
	if (!disable_sig_alarm(false))
685
		elog(FATAL, "could not disable timer for process wakeup");
B
Bruce Momjian 已提交
686

687 688 689 690 691
	/*
	 * Now there is nothing for LockWaitCancel to do.
	 */
	waitingForLock = false;

692
	/*
693
	 * Re-acquire the locktable's masterLock.
694
	 */
695
	LWLockAcquire(masterLock, LW_EXCLUSIVE);
696

697 698 699 700
	/*
	 * We don't have to do anything else, because the awaker did all the
	 * necessary update of the lock table and MyProc.
	 */
701
	return MyProc->errType;
702 703 704 705 706 707
}


/*
 * ProcWakeup -- wake up a process by releasing its private semaphore.
 *
708
 *	 Also remove the process from the wait queue and set its links invalid.
709
 *	 RETURN: the next process in the wait queue.
710 711 712 713
 *
 * XXX: presently, this code is only used for the "success" case, and only
 * works correctly for that case.  To clean up in failure case, would need
 * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
714
 */
J
Jan Wieck 已提交
715 716
PGPROC *
ProcWakeup(PGPROC *proc, int errType)
717
{
J
Jan Wieck 已提交
718
	PGPROC	   *retProc;
719

720
	/* assume that masterLock has been acquired */
721

722
	/* Proc should be sleeping ... */
723 724
	if (proc->links.prev == INVALID_OFFSET ||
		proc->links.next == INVALID_OFFSET)
J
Jan Wieck 已提交
725
		return (PGPROC *) NULL;
726

727
	/* Save next process before we zap the list link */
J
Jan Wieck 已提交
728
	retProc = (PGPROC *) MAKE_PTR(proc->links.next);
729

730
	/* Remove process from wait queue */
731
	SHMQueueDelete(&(proc->links));
732
	(proc->waitLock->waitProcs.size)--;
733

734 735 736
	/* Clean up process' state and pass it the ok/fail signal */
	proc->waitLock = NULL;
	proc->waitHolder = NULL;
737 738
	proc->errType = errType;

739
	/* And awaken it */
740
	PGSemaphoreUnlock(&proc->sem);
741 742

	return retProc;
743 744 745 746
}

/*
 * ProcLockWakeup -- routine for waking up processes when a lock is
747 748
 *		released (or a prior waiter is aborted).  Scan all waiters
 *		for lock, waken any that are no longer blocked.
749
 */
750 751
void
ProcLockWakeup(LOCKMETHODTABLE *lockMethodTable, LOCK *lock)
752
{
753 754
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			queue_size = waitQueue->size;
J
Jan Wieck 已提交
755
	PGPROC	   *proc;
756
	int			aheadRequests = 0;
M
 
Marc G. Fournier 已提交
757

758
	Assert(queue_size >= 0);
759

760 761
	if (queue_size == 0)
		return;
762

J
Jan Wieck 已提交
763
	proc = (PGPROC *) MAKE_PTR(waitQueue->links.next);
764

765 766
	while (queue_size-- > 0)
	{
B
Bruce Momjian 已提交
767
		LOCKMODE	lockmode = proc->waitLockMode;
M
 
Marc G. Fournier 已提交
768 769

		/*
770 771
		 * Waken if (a) doesn't conflict with requests of earlier waiters,
		 * and (b) doesn't conflict with already-held locks.
M
 
Marc G. Fournier 已提交
772
		 */
B
Bruce Momjian 已提交
773
		if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
774 775 776 777 778 779
			LockCheckConflicts(lockMethodTable,
							   lockmode,
							   lock,
							   proc->waitHolder,
							   proc,
							   NULL) == STATUS_OK)
M
 
Marc G. Fournier 已提交
780
		{
781 782 783
			/* OK to waken */
			GrantLock(lock, proc->waitHolder, lockmode);
			proc = ProcWakeup(proc, STATUS_OK);
B
Bruce Momjian 已提交
784

785
			/*
B
Bruce Momjian 已提交
786 787 788
			 * ProcWakeup removes proc from the lock's waiting process
			 * queue and returns the next proc in chain; don't use proc's
			 * next-link, because it's been cleared.
789
			 */
M
 
Marc G. Fournier 已提交
790
		}
791
		else
792
		{
B
Bruce Momjian 已提交
793 794 795 796
			/*
			 * Cannot wake this guy. Remember his request for later
			 * checks.
			 */
797
			aheadRequests |= (1 << lockmode);
J
Jan Wieck 已提交
798
			proc = (PGPROC *) MAKE_PTR(proc->links.next);
799
		}
M
 
Marc G. Fournier 已提交
800
	}
801 802

	Assert(waitQueue->size >= 0);
803 804 805
}

/* --------------------
806
 * We only get to this routine if we got SIGALRM after DeadlockTimeout
807 808 809 810
 * while waiting for a lock to be released by some other process.  Look
 * to see if there's a deadlock; if not, just return and continue waiting.
 * If we have a real deadlock, remove ourselves from the lock's wait queue
 * and signal an error to ProcSleep.
811 812
 * --------------------
 */
813
static void
814
CheckDeadLock(void)
815
{
816
	/*
817 818
	 * Acquire locktable lock.	Note that the deadlock check interrupt had
	 * better not be enabled anywhere that this process itself holds the
819
	 * locktable lock, else this will wait forever.  Also note that
820 821
	 * LWLockAcquire creates a critical section, so that this routine
	 * cannot be interrupted by cancel/die interrupts.
822
	 */
823
	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
824

825
	/*
826 827 828
	 * Check to see if we've been awoken by anyone in the interim.
	 *
	 * If we have we can return and resume our transaction -- happy day.
829 830
	 * Before we are awoken the process releasing the lock grants it to us
	 * so we know that we don't have to wait anymore.
831
	 *
832
	 * We check by looking to see if we've been unlinked from the wait queue.
833 834 835
	 * This is quicker than checking our semaphore's state, since no
	 * kernel call is needed, and it is safe because we hold the locktable
	 * lock.
836 837 838 839
	 */
	if (MyProc->links.prev == INVALID_OFFSET ||
		MyProc->links.next == INVALID_OFFSET)
	{
840
		LWLockRelease(LockMgrLock);
841 842 843
		return;
	}

844
#ifdef LOCK_DEBUG
B
Bruce Momjian 已提交
845 846
	if (Debug_deadlocks)
		DumpAllLocks();
847 848
#endif

849
	if (!DeadLockCheck(MyProc))
B
Bruce Momjian 已提交
850
	{
851
		/* No deadlock, so keep waiting */
852
		LWLockRelease(LockMgrLock);
B
Bruce Momjian 已提交
853 854 855
		return;
	}

856
	/*
857 858 859
	 * Oops.  We have a deadlock.
	 *
	 * Get this process out of wait state.
860
	 */
861 862
	RemoveFromWaitQueue(MyProc);

863 864
	/*
	 * Set MyProc->errType to STATUS_ERROR so that ProcSleep will report
865
	 * an error after we return from the signal handler.
866 867
	 */
	MyProc->errType = STATUS_ERROR;
868

869 870 871
	/*
	 * Unlock my semaphore so that the interrupted ProcSleep() call can
	 * finish.
872
	 */
873
	PGSemaphoreUnlock(&MyProc->sem);
874

875 876 877 878 879 880 881 882 883
	/*
	 * We're done here.  Transaction abort caused by the error that
	 * ProcSleep will raise will cause any other locks we hold to be
	 * released, thus allowing other processes to wake up; we don't need
	 * to do that here. NOTE: an exception is that releasing locks we hold
	 * doesn't consider the possibility of waiters that were blocked
	 * behind us on the lock we just failed to get, and might now be
	 * wakable because we're not in front of them anymore.  However,
	 * RemoveFromWaitQueue took care of waking up any such processes.
884
	 */
885
	LWLockRelease(LockMgrLock);
886 887 888
}


889 890 891 892 893 894 895 896 897 898 899 900
/*
 * ProcWaitForSignal - wait for a signal from another backend.
 *
 * This can share the semaphore normally used for waiting for locks,
 * since a backend could never be waiting for a lock and a signal at
 * the same time.  As with locks, it's OK if the signal arrives just
 * before we actually reach the waiting state.
 */
void
ProcWaitForSignal(void)
{
	waitingForSignal = true;
901
	PGSemaphoreLock(&MyProc->sem, true);
902 903 904 905 906 907 908 909 910 911 912 913 914
	waitingForSignal = false;
}

/*
 * ProcCancelWaitForSignal - clean up an aborted wait for signal
 *
 * We need this in case the signal arrived after we aborted waiting,
 * or if it arrived but we never reached ProcWaitForSignal() at all.
 * Caller should call this after resetting the signal request status.
 */
void
ProcCancelWaitForSignal(void)
{
915
	PGSemaphoreReset(&MyProc->sem);
916 917 918 919 920 921 922 923 924
	waitingForSignal = false;
}

/*
 * ProcSendSignal - send a signal to a backend identified by BackendId
 */
void
ProcSendSignal(BackendId procId)
{
J
Jan Wieck 已提交
925
	PGPROC	   *proc = BackendIdGetProc(procId);
926 927

	if (proc != NULL)
928
		PGSemaphoreUnlock(&proc->sem);
929 930 931
}


932 933 934 935 936 937 938 939 940
/*****************************************************************************
 * SIGALRM interrupt support
 *
 * Maybe these should be in pqsignal.c?
 *****************************************************************************/

/*
 * Enable the SIGALRM interrupt to fire after the specified delay
 *
941
 * Delay is given in milliseconds.	Caller should be sure a SIGALRM
942 943
 * signal handler is installed before this is called.
 *
944 945
 * This code properly handles nesting of deadlock timeout alarms within
 * statement timeout alarms.
946
 *
947 948 949
 * Returns TRUE if okay, FALSE on failure.
 */
bool
950
enable_sig_alarm(int delayms, bool is_statement_timeout)
951
{
952 953 954
#ifdef WIN32
# warning add Win32 timer
#else
955
	struct timeval fin_time;
956
#ifndef __BEOS__
957
	struct itimerval timeval;
958
#else
959
	bigtime_t	time_interval;
960
#endif
961

962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
	/* Compute target timeout time if we will need it */
	if (is_statement_timeout || statement_timeout_active)
	{
		gettimeofday(&fin_time, NULL);
		fin_time.tv_sec += delayms / 1000;
		fin_time.tv_usec += (delayms % 1000) * 1000;
		if (fin_time.tv_usec >= 1000000)
		{
			fin_time.tv_sec++;
			fin_time.tv_usec -= 1000000;
		}
	}

	if (is_statement_timeout)
	{
		/* Begin statement-level timeout */
		Assert(!deadlock_timeout_active);
		statement_fin_time = fin_time;
		statement_timeout_active = true;
	}
	else if (statement_timeout_active)
	{
		/*
		 * Begin deadlock timeout with statement-level timeout active
		 *
		 * Here, we want to interrupt at the closer of the two timeout
		 * times.  If fin_time >= statement_fin_time then we need not
		 * touch the existing timer setting; else set up to interrupt
		 * at the deadlock timeout time.
		 *
		 * NOTE: in this case it is possible that this routine will be
		 * interrupted by the previously-set timer alarm.  This is okay
		 * because the signal handler will do only what it should do according
		 * to the state variables.  The deadlock checker may get run earlier
		 * than normal, but that does no harm.
		 */
		deadlock_timeout_active = true;
		if (fin_time.tv_sec > statement_fin_time.tv_sec ||
			(fin_time.tv_sec == statement_fin_time.tv_sec &&
			 fin_time.tv_usec >= statement_fin_time.tv_usec))
			return true;
	}
	else
	{
		/* Begin deadlock timeout with no statement-level timeout */
		deadlock_timeout_active = true;
	}
1009

1010
	/* If we reach here, okay to set the timer interrupt */
1011
#ifndef __BEOS__
1012 1013 1014
	MemSet(&timeval, 0, sizeof(struct itimerval));
	timeval.it_value.tv_sec = delayms / 1000;
	timeval.it_value.tv_usec = (delayms % 1000) * 1000;
1015
	if (setitimer(ITIMER_REAL, &timeval, NULL))
1016 1017 1018
		return false;
#else
	/* BeOS doesn't have setitimer, but has set_alarm */
1019
	time_interval = delayms * 1000;		/* usecs */
1020
	if (set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM) < 0)
1021 1022
		return false;
#endif
1023
#endif
1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
	return true;
}

/*
 * Cancel the SIGALRM timer, either for a deadlock timeout or a statement
 * timeout.  If a deadlock timeout is canceled, any active statement timeout
 * remains in force.
 *
 * Returns TRUE if okay, FALSE on failure.
 */
bool
disable_sig_alarm(bool is_statement_timeout)
{
1037 1038 1039
#ifdef WIN32
#warning add Win32 timer
#else
1040 1041 1042 1043 1044 1045 1046 1047
	/*
	 * Always disable the interrupt if it is active; this avoids being
	 * interrupted by the signal handler and thereby possibly getting
	 * confused.
	 *
	 * We will re-enable the interrupt if necessary in CheckStatementTimeout.
	 */
	if (statement_timeout_active || deadlock_timeout_active)
1048 1049
	{
#ifndef __BEOS__
1050
		struct itimerval timeval;
1051

1052 1053
		MemSet(&timeval, 0, sizeof(struct itimerval));
		if (setitimer(ITIMER_REAL, &timeval, NULL))
1054
		{
1055 1056 1057
			statement_timeout_active = deadlock_timeout_active = false;
			return false;
		}
1058
#else
1059 1060 1061 1062 1063
		/* BeOS doesn't have setitimer, but has set_alarm */
		if (set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM) < 0)
		{
			statement_timeout_active = deadlock_timeout_active = false;
			return false;
1064
		}
1065
#endif
1066 1067
	}

1068 1069 1070 1071
	/* Always cancel deadlock timeout, in case this is error cleanup */
	deadlock_timeout_active = false;

	/* Cancel or reschedule statement timeout */
1072
	if (is_statement_timeout)
1073 1074 1075 1076 1077 1078
		statement_timeout_active = false;
	else if (statement_timeout_active)
	{
		if (!CheckStatementTimeout())
			return false;
	}
1079
#endif
1080 1081 1082
	return true;
}

1083

1084
/*
1085 1086 1087
 * Check for statement timeout.  If the timeout time has come,
 * trigger a query-cancel interrupt; if not, reschedule the SIGALRM
 * interrupt to occur at the right time.
1088
 *
1089
 * Returns true if okay, false if failed to set the interrupt.
1090
 */
1091 1092
static bool
CheckStatementTimeout(void)
1093
{
1094
	struct timeval now;
B
Bruce Momjian 已提交
1095

1096 1097 1098 1099
	if (!statement_timeout_active)
		return true;			/* do nothing if not active */

	gettimeofday(&now, NULL);
1100

1101 1102 1103
	if (now.tv_sec > statement_fin_time.tv_sec ||
		(now.tv_sec == statement_fin_time.tv_sec &&
		 now.tv_usec >= statement_fin_time.tv_usec))
1104
	{
1105 1106 1107 1108 1109 1110 1111
		/* Time to die */
		statement_timeout_active = false;
		kill(MyProcPid, SIGINT);
	}
	else
	{
		/* Not time yet, so (re)schedule the interrupt */
1112 1113 1114
#ifdef WIN32
#warning add win32 timer
#else
1115
#ifndef __BEOS__
1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
		struct itimerval timeval;

		MemSet(&timeval, 0, sizeof(struct itimerval));
		timeval.it_value.tv_sec = statement_fin_time.tv_sec - now.tv_sec;
		timeval.it_value.tv_usec = statement_fin_time.tv_usec - now.tv_usec;
		if (timeval.it_value.tv_usec < 0)
		{
			timeval.it_value.tv_sec--;
			timeval.it_value.tv_usec += 1000000;
		}
		if (setitimer(ITIMER_REAL, &timeval, NULL))
1127 1128 1129
			return false;
#else
		/* BeOS doesn't have setitimer, but has set_alarm */
1130
		bigtime_t	time_interval;
B
Bruce Momjian 已提交
1131

1132 1133 1134 1135
		time_interval =
			(statement_fin_time.tv_sec - now.tv_sec) * 1000000 +
			(statement_fin_time.tv_usec - now.tv_usec);
		if (set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM) < 0)
1136
			return false;
1137
#endif
1138 1139 1140
#endif
	}

1141 1142
	return true;
}
1143 1144 1145


/*
1146 1147 1148 1149 1150 1151
 * Signal handler for SIGALRM
 *
 * Process deadlock check and/or statement timeout check, as needed.
 * To avoid various edge cases, we must be careful to do nothing
 * when there is nothing to be done.  We also need to be able to
 * reschedule the timer interrupt if called before end of statement.
1152 1153 1154 1155
 */
void
handle_sig_alarm(SIGNAL_ARGS)
{
1156 1157 1158
	int			save_errno = errno;

	if (deadlock_timeout_active)
1159
	{
1160
		deadlock_timeout_active = false;
1161 1162
		CheckDeadLock();
	}
1163 1164 1165 1166 1167

	if (statement_timeout_active)
		(void) CheckStatementTimeout();

	errno = save_errno;
1168
}