proc.c 27.8 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * proc.c
4
 *	  routines to manage per-process shared memory data structure
5
 *
B
Add:  
Bruce Momjian 已提交
6 7
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.92 2001/01/14 05:08:16 tgl Exp $
12 13 14 15
 *
 *-------------------------------------------------------------------------
 */
/*
16 17
 *	Each postgres backend gets one of these.  We'll use it to
 *	clean up after the process should the process suddenly die.
18 19 20
 *
 *
 * Interface (a):
21 22 23
 *		ProcSleep(), ProcWakeup(), ProcWakeupNext(),
 *		ProcQueueAlloc() -- create a shm queue for sleeping processes
 *		ProcQueueInit() -- create a queue without allocing memory
24 25 26 27 28 29 30 31
 *
 * Locking and waiting for buffers can cause the backend to be
 * put to sleep.  Whoever releases the lock, etc. wakes the
 * process up again (and gives it an error code so it knows
 * whether it was awoken on an error condition).
 *
 * Interface (b):
 *
32 33
 * ProcReleaseLocks -- frees the locks associated with current transaction
 *
34
 * ProcKill -- destroys the shared memory state (and locks)
35
 *		associated with the process.
36 37
 *
 * 5/15/91 -- removed the buffer pool based lock chain in favor
38 39 40 41 42 43
 *		of a shared memory lock chain.	The write-protection is
 *		more expensive if the lock chain is in the buffer pool.
 *		The only reason I kept the lock chain in the buffer pool
 *		in the first place was to allow the lock table to grow larger
 *		than available shared memory and that isn't going to work
 *		without a lot of unimplemented support anyway.
44 45
 *
 * 4/7/95 -- instead of allocating a set of 1 semaphore per process, we
46 47 48 49
 *		allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores
 *		shared among backends (we keep a few sets of semaphores around).
 *		This is so that we can support more backends. (system-wide semaphore
 *		sets run out pretty fast.)				  -ay 4/95
50
 *
51
 * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.92 2001/01/14 05:08:16 tgl Exp $
52
 */
53 54
#include "postgres.h"

55
#include <errno.h>
56 57
#include <sys/time.h>
#include <unistd.h>
58
#include <signal.h>
59
#include <sys/types.h>
M
Marc G. Fournier 已提交
60

61
#if defined(solaris_sparc) || defined(__CYGWIN__)
62 63 64 65
#include <sys/ipc.h>
#include <sys/sem.h>
#endif

66 67
#include "miscadmin.h"

68 69 70 71
#if defined(__darwin__)
#include "port/darwin/sem.h"
#endif

72
/* In Ultrix and QNX, sem.h must be included after ipc.h */
73
#ifdef HAVE_SYS_SEM_H
74
#include <sys/sem.h>
75
#endif
B
Bruce Momjian 已提交
76

77
#include "access/xact.h"
78 79
#include "storage/proc.h"

80

81
int DeadlockTimeout = 1000;
M
 
Marc G. Fournier 已提交
82

83 84 85 86 87 88 89
/* --------------------
 * Spin lock for manipulating the shared process data structure:
 * ProcGlobal.... Adding an extra spin lock seemed like the smallest
 * hack to get around reading and updating this structure in shared
 * memory. -mer 17 July 1991
 * --------------------
 */
90
SPINLOCK	ProcStructLock;
91 92 93

static PROC_HDR *ProcGlobal = NULL;

94
PROC	   *MyProc = NULL;
95

96 97 98
static bool waitingForLock = false;

static void ProcKill(void);
99 100
static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum);
static void ProcFreeSem(IpcSemaphoreId semId, int semNum);
101 102 103
static void ZeroProcSemaphore(PROC *proc);
static void ProcFreeAllSemaphores(void);

V
Vadim B. Mikheev 已提交
104

105 106
/*
 * InitProcGlobal -
107
 *	  initializes the global process table. We put it here so that
108
 *	  the postmaster can do this initialization. (ProcFreeAllSemaphores needs
109 110 111
 *	  to read this table on exiting the postmaster. If we have the first
 *	  backend do this, starting up and killing the postmaster without
 *	  starting any backends will be a problem.)
112 113 114 115 116 117 118 119 120 121 122
 *
 *	  We also allocate all the per-process semaphores we will need to support
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
 *	  (mis)configured with small limits on the number of semaphores, and
 *	  running out when trying to start another backend is a common failure.
 *	  So, now we grab enough semaphores to support the desired max number
 *	  of backends immediately at initialization --- if the sysadmin has set
 *	  MaxBackends higher than his kernel will support, he'll find out sooner
 *	  rather than later.
123 124
 */
void
125
InitProcGlobal(int maxBackends)
126
{
127
	bool		found = false;
128

129 130
	/* attach to the free list */
	ProcGlobal = (PROC_HDR *)
131
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
132

133 134
	/* --------------------
	 * We're the first - initialize.
135 136
	 * XXX if found should ever be true, it is a sign of impending doom ...
	 * ought to complain if so?
137 138 139
	 * --------------------
	 */
	if (!found)
140
	{
141
		int			i;
142

143
		ProcGlobal->freeProcs = INVALID_OFFSET;
144 145 146
		for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
		{
			ProcGlobal->procSemIds[i] = -1;
147
			ProcGlobal->freeSemMap[i] = 0;
148
		}
149

B
Bruce Momjian 已提交
150 151
		/*
		 * Arrange to delete semas on exit --- set this up now so that we
152 153 154 155
		 * will clean up if pre-allocation fails.  We use our own freeproc,
		 * rather than IpcSemaphoreCreate's removeOnExit option, because
		 * we don't want to fill up the on_shmem_exit list with a separate
		 * entry for each semaphore set.
156
		 */
157
		on_shmem_exit(ProcFreeAllSemaphores, 0);
158

B
Bruce Momjian 已提交
159
		/*
160
		 * Pre-create the semaphores for the first maxBackends processes.
161
		 */
162 163 164
		Assert(maxBackends > 0 && maxBackends <= MAXBACKENDS);

		for (i = 0; i < ((maxBackends-1)/PROC_NSEMS_PER_SET+1); i++)
165
		{
166 167 168 169 170 171 172
			IpcSemaphoreId		semId;

			semId = IpcSemaphoreCreate(PROC_NSEMS_PER_SET,
									   IPCProtection,
									   1,
									   false);
			ProcGlobal->procSemIds[i] = semId;
173
		}
174 175 176 177 178 179 180 181 182
	}
}

/* ------------------------
 * InitProc -- create a per-process data structure for this process
 * used by the lock manager on semaphore queues.
 * ------------------------
 */
void
183
InitProcess(void)
184
{
185 186 187
	bool		found = false;
	unsigned long location,
				myOffset;
188 189 190

	SpinAcquire(ProcStructLock);

191
	/* attach to the ProcGlobal structure */
192
	ProcGlobal = (PROC_HDR *)
193
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
194
	if (!found)
195
	{
196
		/* this should not happen. InitProcGlobal() is called before this. */
197
		elog(STOP, "InitProcess: Proc Header uninitialized");
198
	}
199 200

	if (MyProc != NULL)
201
	{
202
		SpinRelease(ProcStructLock);
203
		elog(ERROR, "ProcInit: you already exist");
204
	}
205

206
	/* try to get a proc struct from the free list first */
207 208 209 210

	myOffset = ProcGlobal->freeProcs;

	if (myOffset != INVALID_OFFSET)
211
	{
212 213 214 215 216 217 218
		MyProc = (PROC *) MAKE_PTR(myOffset);
		ProcGlobal->freeProcs = MyProc->links.next;
	}
	else
	{

		/*
219 220 221 222
		 * have to allocate one.  We can't use the normal shmem index
		 * table mechanism because the proc structure is stored by PID
		 * instead of by a global name (need to look it up by PID when we
		 * cleanup dead processes).
223 224
		 */

225
		MyProc = (PROC *) ShmemAlloc(sizeof(PROC));
226
		if (!MyProc)
227
		{
228 229
			SpinRelease(ProcStructLock);
			elog(FATAL, "cannot create new proc: out of memory");
230
		}
231 232 233

		/* this cannot be initialized until after the buffer pool */
		SHMQueueInit(&(MyProc->lockQueue));
234
	}
235

236
	/*
237 238 239
	 * zero out the spin lock counts and set the sLocks field for
	 * ProcStructLock to 1 as we have acquired this spinlock above but
	 * didn't record it since we didn't have MyProc until now.
240
	 */
B
Bruce Momjian 已提交
241
	MemSet(MyProc->sLocks, 0, sizeof(MyProc->sLocks));
242 243
	MyProc->sLocks[ProcStructLock] = 1;

244 245 246
	/*
	 * Set up a wait-semaphore for the proc.
	 */
247 248
	if (IsUnderPostmaster)
	{
249
		ProcGetNewSemIdAndNum(&MyProc->sem.semId, &MyProc->sem.semNum);
250 251 252 253
		/*
		 * we might be reusing a semaphore that belongs to a dead backend.
		 * So be careful and reinitialize its value here.
		 */
254
		ZeroProcSemaphore(MyProc);
255 256
	}
	else
257
	{
258
		MyProc->sem.semId = -1;
259 260
		MyProc->sem.semNum = -1;
	}
261

B
Bruce Momjian 已提交
262
	MyProc->pid = MyProcPid;
263
	MyProc->databaseId = MyDatabaseId;
264
	MyProc->xid = InvalidTransactionId;
265
	MyProc->xmin = InvalidTransactionId;
266

267 268 269
	/* ----------------------
	 * Release the lock.
	 * ----------------------
270
	 */
271
	SpinRelease(ProcStructLock);
272 273

	/* -------------------------
274
	 * Install ourselves in the shmem index table.	The name to
275 276 277 278 279 280
	 * use is determined by the OS-assigned process id.  That
	 * allows the cleanup process to find us after any untimely
	 * exit.
	 * -------------------------
	 */
	location = MAKE_OFFSET(MyProc);
281 282
	if ((!ShmemPIDLookup(MyProcPid, &location)) ||
		(location != MAKE_OFFSET(MyProc)))
283
		elog(STOP, "InitProcess: ShmemPID table broken");
284 285 286 287

	MyProc->errType = NO_ERROR;
	SHMQueueElemInit(&(MyProc->links));

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
	on_shmem_exit(ProcKill, 0);
}

/*
 * Initialize the proc's wait-semaphore to count zero.
 */
static void
ZeroProcSemaphore(PROC *proc)
{
	union semun		semun;

	semun.val = 0;
	if (semctl(proc->sem.semId, proc->sem.semNum, SETVAL, semun) < 0)
	{
		fprintf(stderr, "ZeroProcSemaphore: semctl(id=%d,SETVAL) failed: %s\n",
				proc->sem.semId, strerror(errno));
		proc_exit(255);
	}
306 307
}

308 309 310 311
/*
 * Remove a proc from the wait-queue it is on
 * (caller must know it is on one).
 * Locktable lock must be held by caller.
312 313 314 315 316 317
 *
 * NB: this does not remove the process' holder object, nor the lock object,
 * even though their holder counts might now have gone to zero.  That will
 * happen during a subsequent LockReleaseAll call, which we expect will happen
 * during transaction cleanup.  (Removal of a proc from its wait queue by
 * this routine can only happen if we are aborting the transaction.)
H
Hiroshi Inoue 已提交
318
 */
319 320
static void
RemoveFromWaitQueue(PROC *proc)
H
Hiroshi Inoue 已提交
321
{
322 323
	LOCK   *waitLock = proc->waitLock;
	LOCKMODE lockmode = proc->waitLockMode;
324

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
	/* Make sure proc is waiting */
	Assert(proc->links.next != INVALID_OFFSET);
	Assert(waitLock);
	Assert(waitLock->waitProcs.size > 0);

	/* Remove proc from lock's wait queue */
	SHMQueueDelete(&(proc->links));
	waitLock->waitProcs.size--;

	/* Undo increments of holder counts by waiting process */
	Assert(waitLock->nHolding > 0);
	Assert(waitLock->nHolding > proc->waitLock->nActive);
	waitLock->nHolding--;
	Assert(waitLock->holders[lockmode] > 0);
	waitLock->holders[lockmode]--;
	/* don't forget to clear waitMask bit if appropriate */
	if (waitLock->activeHolders[lockmode] == waitLock->holders[lockmode])
		waitLock->waitMask &= ~(1 << lockmode);

	/* Clean up the proc's own state */
	SHMQueueElemInit(&(proc->links));
	proc->waitLock = NULL;
	proc->waitHolder = NULL;

	/* See if any other waiters for the lock can be woken up now */
	ProcLockWakeup(LOCK_LOCKMETHOD(*waitLock), waitLock);
}

/*
 * Cancel any pending wait for lock, when aborting a transaction.
 *
 * (Normally, this would only happen if we accept a cancel/die
 * interrupt while waiting; but an elog(ERROR) while waiting is
 * within the realm of possibility, too.)
 */
void
LockWaitCancel(void)
{
	/* Nothing to do if we weren't waiting for a lock */
	if (!waitingForLock)
		return;
	waitingForLock = false;

	/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
#ifndef __BEOS__
H
Hiroshi Inoue 已提交
370
	{
371 372 373 374 375
		struct itimerval timeval,
						 dummy;

		MemSet(&timeval, 0, sizeof(struct itimerval));
		setitimer(ITIMER_REAL, &timeval, &dummy);
H
Hiroshi Inoue 已提交
376
	}
377 378 379 380 381 382 383 384 385
#else
	/* BeOS doesn't have setitimer, but has set_alarm */
    set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM);
#endif /* __BEOS__ */

	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
	LockLockTable();
	if (MyProc->links.next != INVALID_OFFSET)
		RemoveFromWaitQueue(MyProc);
H
Hiroshi Inoue 已提交
386 387
	UnlockLockTable();

388 389 390 391 392 393 394 395 396 397
	/*
	 * Reset the proc wait semaphore to zero.  This is necessary in the
	 * scenario where someone else granted us the lock we wanted before we
	 * were able to remove ourselves from the wait-list.  The semaphore will
	 * have been bumped to 1 by the would-be grantor, and since we are no
	 * longer going to wait on the sema, we have to force it back to zero.
	 * Otherwise, our next attempt to wait for a lock will fall through
	 * prematurely.
	 */
	ZeroProcSemaphore(MyProc);
H
Hiroshi Inoue 已提交
398
}
399

400

401
/*
402 403 404 405 406 407 408 409
 * ProcReleaseLocks() -- release locks associated with current transaction
 *			at transaction commit or abort
 *
 * At commit, we release only locks tagged with the current transaction's XID,
 * leaving those marked with XID 0 (ie, session locks) undisturbed.  At abort,
 * we release all locks including XID 0, because we need to clean up after
 * a failure.  This logic will need extension if we ever support nested
 * transactions.
410
 *
411
 * Note that user locks are not released in either case.
412 413
 */
void
414
ProcReleaseLocks(bool isCommit)
415
{
416 417
	if (!MyProc)
		return;
418 419 420
	/* If waiting, get off wait queue (should only be needed after error) */
	LockWaitCancel();
	/* Release locks */
421 422
	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc,
				   !isCommit, GetCurrentTransactionId());
423 424 425 426
}

/*
 * ProcRemove -
427 428
 *	  called by the postmaster to clean up the global tables after a
 *	  backend exits.  This also frees up the proc's wait semaphore.
429 430 431 432
 */
bool
ProcRemove(int pid)
{
433 434
	SHMEM_OFFSET location;
	PROC	   *proc;
435 436 437

	location = ShmemPIDDestroy(pid);
	if (location == INVALID_OFFSET)
438
		return FALSE;
439 440 441 442
	proc = (PROC *) MAKE_PTR(location);

	SpinAcquire(ProcStructLock);

443
	ProcFreeSem(proc->sem.semId, proc->sem.semNum);
444 445 446 447 448 449

	proc->links.next = ProcGlobal->freeProcs;
	ProcGlobal->freeProcs = MAKE_OFFSET(proc);

	SpinRelease(ProcStructLock);

450
	return TRUE;
451 452 453 454
}

/*
 * ProcKill() -- Destroy the per-proc data structure for
455
 *		this process. Release any of its held spin locks.
456 457 458
 *
 * This is done inside the backend process before it exits.
 * ProcRemove, above, will be done by the postmaster afterwards.
459 460
 */
static void
461
ProcKill(void)
462
{
463
	Assert(MyProc);
464

465 466
	/* Release any spinlocks I am holding */
	ProcReleaseSpins(MyProc);
467

468 469
	/* Get off any wait queue I might be on */
	LockWaitCancel();
470

471
	/* Remove from the standard lock table */
472
	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, true, InvalidTransactionId);
473

474 475
#ifdef USER_LOCKS
	/* Remove from the user lock table */
476
	LockReleaseAll(USER_LOCKMETHOD, MyProc, true, InvalidTransactionId);
477
#endif
478 479

	MyProc = NULL;
480 481 482 483
}

/*
 * ProcQueue package: routines for putting processes to sleep
484
 *		and  waking them up
485 486 487 488 489 490 491 492
 */

/*
 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
 *
 * Returns: a pointer to the queue or NULL
 * Side Effects: Initializes the queue if we allocated one
 */
493
#ifdef NOT_USED
494
PROC_QUEUE *
495 496
ProcQueueAlloc(char *name)
{
497 498
	bool		found;
	PROC_QUEUE *queue = (PROC_QUEUE *)
499
		ShmemInitStruct(name, sizeof(PROC_QUEUE), &found);
500 501

	if (!queue)
502
		return NULL;
503 504
	if (!found)
		ProcQueueInit(queue);
505
	return queue;
506
}
507

508
#endif
509 510 511 512 513

/*
 * ProcQueueInit -- initialize a shared memory process queue
 */
void
514
ProcQueueInit(PROC_QUEUE *queue)
515
{
516 517
	SHMQueueInit(&(queue->links));
	queue->size = 0;
518 519 520 521 522 523 524
}


/*
 * ProcSleep -- put a process to sleep
 *
 * P() on the semaphore should put us to sleep.  The process
525 526 527 528
 * semaphore is normally zero, so when we try to acquire it, we sleep.
 *
 * Locktable's spinlock must be held at entry, and will be held
 * at exit.
529
 *
530 531
 * Result is NO_ERROR if we acquired the lock, STATUS_ERROR if not (deadlock).
 *
532
 * ASSUME: that no one will fiddle with the queue until after
533
 *		we release the spin lock.
534 535 536 537
 *
 * NOTES: The process queue is now a priority queue for locking.
 */
int
538 539 540 541
ProcSleep(LOCKMETHODCTL *lockctl,
		  LOCKMODE lockmode,
		  LOCK *lock,
		  HOLDER *holder)
542
{
543
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
V
Vadim B. Mikheev 已提交
544
	SPINLOCK	spinlock = lockctl->masterLock;
545
	int			myMask = (1 << lockmode);
V
Vadim B. Mikheev 已提交
546
	int			waitMask = lock->waitMask;
547 548
	PROC	   *proc;
	int			i;
V
Vadim B. Mikheev 已提交
549
	int			aheadHolders[MAX_LOCKMODES];
550
	bool		selfConflict = (lockctl->conflictTab[lockmode] & myMask),
V
Vadim B. Mikheev 已提交
551
				prevSame = false;
552
#ifndef __BEOS__
B
Bruce Momjian 已提交
553 554
	struct itimerval timeval,
				dummy;
555 556 557
#else
    bigtime_t time_interval;
#endif
558

V
Vadim B. Mikheev 已提交
559
	MyProc->waitLock = lock;
560 561 562
	MyProc->waitHolder = holder;
	MyProc->waitLockMode = lockmode;
	/* We assume the caller set up MyProc->holdLock */
V
Vadim B. Mikheev 已提交
563

B
Bruce Momjian 已提交
564
	proc = (PROC *) MAKE_PTR(waitQueue->links.prev);
565

V
Vadim B. Mikheev 已提交
566
	/* if we don't conflict with any waiter - be first in queue */
567
	if (!(lockctl->conflictTab[lockmode] & waitMask))
V
Vadim B. Mikheev 已提交
568
		goto ins;
569

V
Vadim B. Mikheev 已提交
570 571
	for (i = 1; i < MAX_LOCKMODES; i++)
		aheadHolders[i] = lock->activeHolders[i];
572
	(aheadHolders[lockmode])++;
573

V
Vadim B. Mikheev 已提交
574 575 576
	for (i = 0; i < waitQueue->size; i++)
	{
		/* am I waiting for him ? */
577
		if (lockctl->conflictTab[lockmode] & proc->holdLock)
V
Vadim B. Mikheev 已提交
578 579
		{
			/* is he waiting for me ? */
580
			if (lockctl->conflictTab[proc->waitLockMode] & MyProc->holdLock)
V
Vadim B. Mikheev 已提交
581
			{
582
				/* Yes, report deadlock failure */
V
Vadim B. Mikheev 已提交
583 584 585 586 587 588
				MyProc->errType = STATUS_ERROR;
				goto rt;
			}
			/* being waiting for him - go past */
		}
		/* if he waits for me */
589
		else if (lockctl->conflictTab[proc->waitLockMode] & MyProc->holdLock)
V
Vadim B. Mikheev 已提交
590 591
			break;
		/* if conflicting locks requested */
592
		else if (lockctl->conflictTab[proc->waitLockMode] & myMask)
V
Vadim B. Mikheev 已提交
593
		{
B
Bruce Momjian 已提交
594

V
Vadim B. Mikheev 已提交
595
			/*
B
Bruce Momjian 已提交
596 597
			 * If I request non self-conflicting lock and there are others
			 * requesting the same lock just before me - stay here.
V
Vadim B. Mikheev 已提交
598 599 600 601
			 */
			if (!selfConflict && prevSame)
				break;
		}
B
Bruce Momjian 已提交
602

V
Vadim B. Mikheev 已提交
603
		/*
B
Bruce Momjian 已提交
604 605
		 * Last attempt to don't move any more: if we don't conflict with
		 * rest waiters in queue.
V
Vadim B. Mikheev 已提交
606
		 */
607
		else if (!(lockctl->conflictTab[lockmode] & waitMask))
V
Vadim B. Mikheev 已提交
608
			break;
609

610 611 612 613
		prevSame = (proc->waitLockMode == lockmode);
		(aheadHolders[proc->waitLockMode])++;
		if (aheadHolders[proc->waitLockMode] == lock->holders[proc->waitLockMode])
			waitMask &= ~(1 << proc->waitLockMode);
V
Vadim B. Mikheev 已提交
614 615
		proc = (PROC *) MAKE_PTR(proc->links.prev);
	}
616

V
Vadim B. Mikheev 已提交
617
ins:;
618
	/* -------------------
619
	 * Insert self into queue.  These operations are atomic (because
620 621 622 623
	 * of the spinlock).
	 * -------------------
	 */
	SHMQueueInsertTL(&(proc->links), &(MyProc->links));
B
Bruce Momjian 已提交
624
	waitQueue->size++;
625

V
Vadim B. Mikheev 已提交
626
	lock->waitMask |= myMask;
627

628 629
	MyProc->errType = NO_ERROR;		/* initialize result for success */

630 631 632 633 634 635 636 637 638 639 640 641
	/* mark that we are waiting for a lock */
	waitingForLock = true;

	/* -------------------
	 * Release the locktable's spin lock.
	 *
	 * NOTE: this may also cause us to exit critical-section state,
	 * possibly allowing a cancel/die interrupt to be accepted.
	 * This is OK because we have recorded the fact that we are waiting for
	 * a lock, and so LockWaitCancel will clean up if cancel/die happens.
	 * -------------------
	 */
642 643
	SpinRelease(spinlock);

644
	/* --------------
645 646 647 648 649 650 651
	 * Set timer so we can wake up after awhile and check for a deadlock.
	 * If a deadlock is detected, the handler releases the process's
	 * semaphore and sets MyProc->errType = STATUS_ERROR, allowing us to
	 * know that we must report failure rather than success.
	 *
	 * By delaying the check until we've waited for a bit, we can avoid
	 * running the rather expensive deadlock-check code in most cases.
B
Bruce Momjian 已提交
652 653 654
	 *
	 * Need to zero out struct to set the interval and the micro seconds fields
	 * to 0.
655 656
	 * --------------
	 */
657
#ifndef __BEOS__
B
Bruce Momjian 已提交
658
	MemSet(&timeval, 0, sizeof(struct itimerval));
659 660
	timeval.it_value.tv_sec = DeadlockTimeout / 1000;
	timeval.it_value.tv_usec = (DeadlockTimeout % 1000) * 1000;
661 662
	if (setitimer(ITIMER_REAL, &timeval, &dummy))
		elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
663
#else
664 665 666
    time_interval = DeadlockTimeout * 1000000; /* usecs */
	if (set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM) < 0)
		elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
667
#endif
668

669 670 671 672 673 674 675
	/* --------------
	 * If someone wakes us between SpinRelease and IpcSemaphoreLock,
	 * IpcSemaphoreLock will not block.  The wakeup is "saved" by
	 * the semaphore implementation.  Note also that if HandleDeadLock
	 * is invoked but does not detect a deadlock, IpcSemaphoreLock()
	 * will continue to wait.  There used to be a loop here, but it
	 * was useless code...
676 677 678 679 680 681
	 *
	 * We pass interruptOK = true, which eliminates a window in which
	 * cancel/die interrupts would be held off undesirably.  This is a
	 * promise that we don't mind losing control to a cancel/die interrupt
	 * here.  We don't, because we have no state-change work to do after
	 * being granted the lock (the grantor did it all).
682 683
	 * --------------
	 */
684
	IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, true);
685

B
Bruce Momjian 已提交
686
	/* ---------------
687
	 * Disable the timer, if it's still running
B
Bruce Momjian 已提交
688 689
	 * ---------------
	 */
690
#ifndef __BEOS__
691
	MemSet(&timeval, 0, sizeof(struct itimerval));
B
Bruce Momjian 已提交
692
	if (setitimer(ITIMER_REAL, &timeval, &dummy))
693
		elog(FATAL, "ProcSleep: Unable to disable timer for process wakeup");
694 695
#else
    if (set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM) < 0)
696
		elog(FATAL, "ProcSleep: Unable to disable timer for process wakeup");
697
#endif
B
Bruce Momjian 已提交
698

699 700 701 702 703
	/*
	 * Now there is nothing for LockWaitCancel to do.
	 */
	waitingForLock = false;

704
	/* ----------------
705 706 707 708
	 * Re-acquire the locktable's spin lock.
	 *
	 * We could accept a cancel/die interrupt here.  That's OK because
	 * the lock is now registered as being held by this process.
709 710 711 712
	 * ----------------
	 */
	SpinAcquire(spinlock);

V
Vadim B. Mikheev 已提交
713 714
rt:;

715 716
	MyProc->waitLock = NULL;
	MyProc->waitHolder = NULL;
M
 
Marc G. Fournier 已提交
717

718
	return MyProc->errType;
719 720 721 722 723 724
}


/*
 * ProcWakeup -- wake up a process by releasing its private semaphore.
 *
725
 *	 Also remove the process from the wait queue and set its links invalid.
726
 *	 RETURN: the next process in the wait queue.
727
 */
B
Bruce Momjian 已提交
728
PROC *
729
ProcWakeup(PROC *proc, int errType)
730
{
731
	PROC	   *retProc;
732 733 734 735 736

	/* assume that spinlock has been acquired */

	if (proc->links.prev == INVALID_OFFSET ||
		proc->links.next == INVALID_OFFSET)
737
		return (PROC *) NULL;
738 739 740 741 742

	retProc = (PROC *) MAKE_PTR(proc->links.prev);

	SHMQueueDelete(&(proc->links));
	SHMQueueElemInit(&(proc->links));
743
	(proc->waitLock->waitProcs.size)--;
744 745 746

	proc->errType = errType;

747
	IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
748 749

	return retProc;
750 751 752 753
}

/*
 * ProcLockWakeup -- routine for waking up processes when a lock is
754
 *		released.
755 756
 */
int
757
ProcLockWakeup(LOCKMETHOD lockmethod, LOCK *lock)
758
{
759
	PROC_QUEUE *queue = &(lock->waitProcs);
760
	PROC	   *proc;
761 762
	int			awoken = 0;
	LOCKMODE	last_lockmode = 0;
M
 
Marc G. Fournier 已提交
763 764
	int			queue_size = queue->size;

765
	Assert(queue_size >= 0);
766

767
	if (!queue_size)
768
		return STATUS_NOT_FOUND;
769 770

	proc = (PROC *) MAKE_PTR(queue->links.prev);
771

772 773 774 775 776 777 778 779 780 781
	while (queue_size-- > 0)
	{
		if (proc->waitLockMode == last_lockmode)
		{
			/*
			 * This proc will conflict as the previous one did, don't even
			 * try.
			 */
			goto nextProc;
		}
M
 
Marc G. Fournier 已提交
782 783

		/*
V
Vadim B. Mikheev 已提交
784
		 * Does this proc conflict with locks held by others ?
M
 
Marc G. Fournier 已提交
785 786
		 */
		if (LockResolveConflicts(lockmethod,
787
								 proc->waitLockMode,
788
								 lock,
789 790 791
								 proc->waitHolder,
								 proc,
								 NULL) != STATUS_OK)
M
 
Marc G. Fournier 已提交
792
		{
793 794
			/* Yes.  Quit if we already awoke at least one process. */
			if (awoken != 0)
V
Vadim B. Mikheev 已提交
795
				break;
796 797 798
			/* Otherwise, see if any later waiters can be awoken. */
			last_lockmode = proc->waitLockMode;
			goto nextProc;
M
 
Marc G. Fournier 已提交
799
		}
800 801

		/*
802
		 * OK to wake up this sleeping process.
803
		 */
804 805 806
		GrantLock(lock, proc->waitHolder, proc->waitLockMode);
		proc = ProcWakeup(proc, NO_ERROR);
		awoken++;
807 808

		/*
809 810
		 * ProcWakeup removes proc from the lock's waiting process queue
		 * and returns the next proc in chain; don't use prev link.
811
		 */
812
		continue;
813

814 815
nextProc:
		proc = (PROC *) MAKE_PTR(proc->links.prev);
816
	}
817

M
 
Marc G. Fournier 已提交
818 819
	Assert(queue->size >= 0);

820
	if (awoken)
821
		return STATUS_OK;
822 823
	else
	{
824
		/* Something is still blocking us.	May have deadlocked. */
825 826 827
#ifdef LOCK_DEBUG
		if (lock->tag.lockmethod == USER_LOCKMETHOD ? Trace_userlocks : Trace_locks)
		{
828 829
			elog(DEBUG, "ProcLockWakeup: lock(%lx) can't wake up any process",
				 MAKE_OFFSET(lock));
830
			if (Debug_deadlocks)
831
				DumpAllLocks();
832
		}
M
 
Marc G. Fournier 已提交
833
#endif
834
		return STATUS_NOT_FOUND;
M
 
Marc G. Fournier 已提交
835
	}
836 837 838
}

void
839
ProcAddLock(SHM_QUEUE *elem)
840
{
841
	SHMQueueInsertTL(&MyProc->lockQueue, elem);
842 843 844
}

/* --------------------
845
 * We only get to this routine if we got SIGALRM after DeadlockTimeout
846 847 848 849
 * while waiting for a lock to be released by some other process.  Look
 * to see if there's a deadlock; if not, just return and continue waiting.
 * If we have a real deadlock, remove ourselves from the lock's wait queue
 * and signal an error to ProcSleep.
850 851
 * --------------------
 */
852
void
853
HandleDeadLock(SIGNAL_ARGS)
854
{
855
	int			save_errno = errno;
856

857 858 859 860 861 862 863
	/*
	 * Acquire locktable lock.  Note that the SIGALRM interrupt had better
	 * not be enabled anywhere that this process itself holds the locktable
	 * lock, else this will wait forever.  Also note that this calls
	 * SpinAcquire which creates a critical section, so that this routine
	 * cannot be interrupted by cancel/die interrupts.
	 */
864 865 866 867 868 869 870 871 872
	LockLockTable();

	/* ---------------------
	 * Check to see if we've been awoken by anyone in the interim.
	 *
	 * If we have we can return and resume our transaction -- happy day.
	 * Before we are awoken the process releasing the lock grants it to
	 * us so we know that we don't have to wait anymore.
	 *
873 874 875
	 * We check by looking to see if we've been unlinked from the wait queue.
	 * This is quicker than checking our semaphore's state, since no kernel
	 * call is needed, and it is safe because we hold the locktable lock.
876 877 878 879 880 881
	 * ---------------------
	 */
	if (MyProc->links.prev == INVALID_OFFSET ||
		MyProc->links.next == INVALID_OFFSET)
	{
		UnlockLockTable();
882
		errno = save_errno;
883 884 885
		return;
	}

886 887 888
#ifdef LOCK_DEBUG
    if (Debug_deadlocks)
        DumpAllLocks();
889 890
#endif

B
Bruce Momjian 已提交
891
	if (!DeadLockCheck(MyProc, MyProc->waitLock))
B
Bruce Momjian 已提交
892
	{
893
		/* No deadlock, so keep waiting */
B
Bruce Momjian 已提交
894
		UnlockLockTable();
895
		errno = save_errno;
B
Bruce Momjian 已提交
896 897 898
		return;
	}

899
	/* ------------------------
900 901 902
	 * Oops.  We have a deadlock.
	 *
	 * Get this process out of wait state.
903 904
	 * ------------------------
	 */
905 906 907 908 909 910 911 912
	RemoveFromWaitQueue(MyProc);

	/* -------------
	 * Set MyProc->errType to STATUS_ERROR so that ProcSleep will
	 * report an error after we return from this signal handler.
	 * -------------
	 */
	MyProc->errType = STATUS_ERROR;
913 914

	/* ------------------
915
	 * Unlock my semaphore so that the interrupted ProcSleep() call can finish.
916 917
	 * ------------------
	 */
918
	IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum);
919

920 921 922 923 924 925 926 927 928 929
	/* ------------------
	 * We're done here.  Transaction abort caused by the error that ProcSleep
	 * will raise will cause any other locks we hold to be released, thus
	 * allowing other processes to wake up; we don't need to do that here.
	 * NOTE: an exception is that releasing locks we hold doesn't consider
	 * the possibility of waiters that were blocked behind us on the lock
	 * we just failed to get, and might now be wakable because we're not
	 * in front of them anymore.  However, RemoveFromWaitQueue took care of
	 * waking up any such processes.
	 * ------------------
930 931
	 */
	UnlockLockTable();
932
	errno = save_errno;
933 934 935
}

void
936
ProcReleaseSpins(PROC *proc)
937
{
938
	int			i;
939 940 941 942 943 944 945

	if (!proc)
		proc = MyProc;

	if (!proc)
		return;
	for (i = 0; i < (int) MAX_SPINS; i++)
946
	{
947
		if (proc->sLocks[i])
948
		{
949 950
			Assert(proc->sLocks[i] == 1);
			SpinRelease(i);
951 952
		}
	}
H
 
Hiroshi Inoue 已提交
953
	AbortBufferIO();
954 955 956
}

/*****************************************************************************
957
 *
958 959 960
 *****************************************************************************/

/*
961
 * ProcGetNewSemIdAndNum -
962
 *	  scan the free semaphore bitmap and allocate a single semaphore from
963
 *	  a semaphore set.
964 965
 */
static void
966
ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum)
967
{
968
	int			i;
969
	IpcSemaphoreId *procSemIds = ProcGlobal->procSemIds;
970
	int32	   *freeSemMap = ProcGlobal->freeSemMap;
971
	int32		fullmask = (1 << PROC_NSEMS_PER_SET) - 1;
972

973 974 975 976
	/*
	 * we hold ProcStructLock when entering this routine. We scan through
	 * the bitmap to look for a free semaphore.
	 */
977

978
	for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
979
	{
980 981
		int			mask = 1;
		int			j;
982 983

		if (freeSemMap[i] == fullmask)
984
			continue;			/* this set is fully allocated */
985 986
		if (procSemIds[i] < 0)
			continue;			/* this set hasn't been initialized */
987 988 989 990 991 992 993

		for (j = 0; j < PROC_NSEMS_PER_SET; j++)
		{
			if ((freeSemMap[i] & mask) == 0)
			{

				/*
994
				 * a free semaphore found. Mark it as allocated.
995
				 */
996
				freeSemMap[i] |= mask;
997

998
				*semId = procSemIds[i];
999 1000 1001 1002 1003
				*semNum = j;
				return;
			}
			mask <<= 1;
		}
1004 1005
	}

1006
	/* if we reach here, all the semaphores are in use. */
1007
	elog(ERROR, "ProcGetNewSemIdAndNum: cannot allocate a free semaphore");
1008 1009 1010 1011
}

/*
 * ProcFreeSem -
1012
 *	  free up our semaphore in the semaphore set.
1013 1014
 */
static void
1015
ProcFreeSem(IpcSemaphoreId semId, int semNum)
1016
{
1017
	int32		mask;
1018
	int			i;
1019

1020
	mask = ~(1 << semNum);
1021

1022 1023 1024 1025 1026 1027 1028 1029 1030
	for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
	{
		if (ProcGlobal->procSemIds[i] == semId)
		{
			ProcGlobal->freeSemMap[i] &= mask;
			return;
		}
	}
	fprintf(stderr, "ProcFreeSem: no ProcGlobal entry for semId %d\n", semId);
1031 1032 1033 1034
}

/*
 * ProcFreeAllSemaphores -
1035 1036 1037
 *	  called at shmem_exit time, ie when exiting the postmaster or
 *	  destroying shared state for a failed set of backends.
 *	  Free up all the semaphores allocated to the lmgrs of the backends.
1038
 */
1039
static void
1040
ProcFreeAllSemaphores(void)
1041
{
1042
	int			i;
1043

1044
	for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
1045
	{
1046 1047
		if (ProcGlobal->procSemIds[i] >= 0)
			IpcSemaphoreKill(ProcGlobal->procSemIds[i]);
1048
	}
1049
}