proc.c 24.1 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * proc.c
4
 *	  routines to manage per-process shared memory data structure
5 6 7 8 9
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
10
 *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.62 1999/10/06 21:58:07 vadim Exp $
11 12 13 14
 *
 *-------------------------------------------------------------------------
 */
/*
15 16
 *	Each postgres backend gets one of these.  We'll use it to
 *	clean up after the process should the process suddenly die.
17 18 19
 *
 *
 * Interface (a):
20 21 22
 *		ProcSleep(), ProcWakeup(), ProcWakeupNext(),
 *		ProcQueueAlloc() -- create a shm queue for sleeping processes
 *		ProcQueueInit() -- create a queue without allocing memory
23 24 25 26 27 28 29 30 31 32
 *
 * Locking and waiting for buffers can cause the backend to be
 * put to sleep.  Whoever releases the lock, etc. wakes the
 * process up again (and gives it an error code so it knows
 * whether it was awoken on an error condition).
 *
 * Interface (b):
 *
 * ProcReleaseLocks -- frees the locks associated with this process,
 * ProcKill -- destroys the shared memory state (and locks)
33
 *		associated with the process.
34 35
 *
 * 5/15/91 -- removed the buffer pool based lock chain in favor
36 37 38 39 40 41
 *		of a shared memory lock chain.	The write-protection is
 *		more expensive if the lock chain is in the buffer pool.
 *		The only reason I kept the lock chain in the buffer pool
 *		in the first place was to allow the lock table to grow larger
 *		than available shared memory and that isn't going to work
 *		without a lot of unimplemented support anyway.
42 43
 *
 * 4/7/95 -- instead of allocating a set of 1 semaphore per process, we
44 45 46 47
 *		allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores
 *		shared among backends (we keep a few sets of semaphores around).
 *		This is so that we can support more backends. (system-wide semaphore
 *		sets run out pretty fast.)				  -ay 4/95
48
 *
49
 * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.62 1999/10/06 21:58:07 vadim Exp $
50 51 52
 */
#include <sys/time.h>
#include <unistd.h>
53
#include <signal.h>
54
#include <sys/types.h>
M
Marc G. Fournier 已提交
55

B
Bruce Momjian 已提交
56
#if defined(solaris_sparc)
57 58 59 60
#include <sys/ipc.h>
#include <sys/sem.h>
#endif

M
Marc G. Fournier 已提交
61
#include "postgres.h"
62
#include "miscadmin.h"
63
#include "libpq/pqsignal.h"
64 65


66 67
/* In Ultrix, sem.h must be included after ipc.h */
#include <sys/sem.h>
B
Bruce Momjian 已提交
68

B
Bruce Momjian 已提交
69
#include "storage/lmgr.h"
70
#include "storage/proc.h"
M
 
Marc G. Fournier 已提交
71
#include "utils/trace.h"
72

73
void HandleDeadLock(SIGNAL_ARGS);
74
static void ProcFreeAllSemaphores(void);
75

M
 
Marc G. Fournier 已提交
76 77
#define DeadlockCheckTimer pg_options[OPT_DEADLOCKTIMEOUT]

78 79 80 81 82 83 84
/* --------------------
 * Spin lock for manipulating the shared process data structure:
 * ProcGlobal.... Adding an extra spin lock seemed like the smallest
 * hack to get around reading and updating this structure in shared
 * memory. -mer 17 July 1991
 * --------------------
 */
85
SPINLOCK	ProcStructLock;
86 87 88

static PROC_HDR *ProcGlobal = NULL;

89
PROC	   *MyProc = NULL;
90

91
static void ProcKill(int exitStatus, int pid);
92
static void ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum);
93
static void ProcFreeSem(IpcSemaphoreKey semKey, int semNum);
94

V
Vadim B. Mikheev 已提交
95 96
static char *DeadLockMessage = "Deadlock detected -- See the lock(l) manual page for a possible cause.";

97 98
/*
 * InitProcGlobal -
99
 *	  initializes the global process table. We put it here so that
100
 *	  the postmaster can do this initialization. (ProcFreeAllSemaphores needs
101 102 103
 *	  to read this table on exiting the postmaster. If we have the first
 *	  backend do this, starting up and killing the postmaster without
 *	  starting any backends will be a problem.)
104 105 106 107 108 109 110 111 112 113 114
 *
 *	  We also allocate all the per-process semaphores we will need to support
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
 *	  (mis)configured with small limits on the number of semaphores, and
 *	  running out when trying to start another backend is a common failure.
 *	  So, now we grab enough semaphores to support the desired max number
 *	  of backends immediately at initialization --- if the sysadmin has set
 *	  MaxBackends higher than his kernel will support, he'll find out sooner
 *	  rather than later.
115 116
 */
void
117
InitProcGlobal(IPCKey key, int maxBackends)
118
{
119
	bool		found = false;
120

121 122 123
	/* attach to the free list */
	ProcGlobal = (PROC_HDR *)
		ShmemInitStruct("Proc Header", (unsigned) sizeof(PROC_HDR), &found);
124

125 126
	/* --------------------
	 * We're the first - initialize.
127 128
	 * XXX if found should ever be true, it is a sign of impending doom ...
	 * ought to complain if so?
129 130 131
	 * --------------------
	 */
	if (!found)
132
	{
133
		int			i;
134

135 136 137 138
		ProcGlobal->freeProcs = INVALID_OFFSET;
		ProcGlobal->currKey = IPCGetProcessSemaphoreInitKey(key);
		for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++)
			ProcGlobal->freeSemMap[i] = 0;
139

B
Bruce Momjian 已提交
140 141 142
		/*
		 * Arrange to delete semas on exit --- set this up now so that we
		 * will clean up if pre-allocation fails...
143 144 145
		 */
		on_shmem_exit(ProcFreeAllSemaphores, NULL);

B
Bruce Momjian 已提交
146 147
		/*
		 * Pre-create the semaphores for the first maxBackends processes,
148 149 150
		 * unless we are running as a standalone backend.
		 */
		if (key != PrivateIPCKey)
151
		{
152
			for (i = 0;
B
Bruce Momjian 已提交
153
				 i < (maxBackends + PROC_NSEMS_PER_SET - 1) / PROC_NSEMS_PER_SET;
154 155 156 157 158 159 160 161 162 163
				 i++)
			{
				IPCKey		semKey = ProcGlobal->currKey + i;
				int			semId;
				int			semstat;

				semId = IpcSemaphoreCreate(semKey,
										   PROC_NSEMS_PER_SET,
										   IPCProtection,
										   IpcSemaphoreDefaultStartValue,
164 165 166
										   0);
				if (semId < 0)
					elog(FATAL, "InitProcGlobal: IpcSemaphoreCreate failed");
167 168 169
				/* mark this sema set allocated */
				ProcGlobal->freeSemMap[i] = (1 << PROC_NSEMS_PER_SET);
			}
170
		}
171 172 173 174 175 176 177 178 179 180 181
	}
}

/* ------------------------
 * InitProc -- create a per-process data structure for this process
 * used by the lock manager on semaphore queues.
 * ------------------------
 */
void
InitProcess(IPCKey key)
{
182 183 184 185
	bool		found = false;
	int			semstat;
	unsigned long location,
				myOffset;
186 187 188 189 190 191 192

	SpinAcquire(ProcStructLock);

	/* attach to the free list */
	ProcGlobal = (PROC_HDR *)
		ShmemInitStruct("Proc Header", (unsigned) sizeof(PROC_HDR), &found);
	if (!found)
193
	{
194
		/* this should not happen. InitProcGlobal() is called before this. */
195
		elog(STOP, "InitProcess: Proc Header uninitialized");
196
	}
197 198

	if (MyProc != NULL)
199
	{
200
		SpinRelease(ProcStructLock);
201
		elog(ERROR, "ProcInit: you already exist");
202
		return;
203
	}
204 205 206 207 208 209

	/* try to get a proc from the free list first */

	myOffset = ProcGlobal->freeProcs;

	if (myOffset != INVALID_OFFSET)
210
	{
211 212 213 214 215 216 217
		MyProc = (PROC *) MAKE_PTR(myOffset);
		ProcGlobal->freeProcs = MyProc->links.next;
	}
	else
	{

		/*
218 219 220 221
		 * have to allocate one.  We can't use the normal shmem index
		 * table mechanism because the proc structure is stored by PID
		 * instead of by a global name (need to look it up by PID when we
		 * cleanup dead processes).
222 223 224 225
		 */

		MyProc = (PROC *) ShmemAlloc((unsigned) sizeof(PROC));
		if (!MyProc)
226
		{
227 228
			SpinRelease(ProcStructLock);
			elog(FATAL, "cannot create new proc: out of memory");
229
		}
230 231 232

		/* this cannot be initialized until after the buffer pool */
		SHMQueueInit(&(MyProc->lockQueue));
233
	}
234

235
	/*
236 237 238
	 * zero out the spin lock counts and set the sLocks field for
	 * ProcStructLock to 1 as we have acquired this spinlock above but
	 * didn't record it since we didn't have MyProc until now.
239
	 */
B
Bruce Momjian 已提交
240
	MemSet(MyProc->sLocks, 0, sizeof(MyProc->sLocks));
241 242 243 244 245
	MyProc->sLocks[ProcStructLock] = 1;


	if (IsUnderPostmaster)
	{
246 247 248 249
		IPCKey		semKey;
		int			semNum;
		int			semId;
		union semun semun;
250 251 252

		ProcGetNewSemKeyAndNum(&semKey, &semNum);

B
Bruce Momjian 已提交
253 254 255 256 257
		/*
		 * Note: because of the pre-allocation done in InitProcGlobal,
		 * this call should always attach to an existing semaphore. It
		 * will (try to) create a new group of semaphores only if the
		 * postmaster tries to start more backends than it said it would.
258
		 */
259 260 261 262
		semId = IpcSemaphoreCreate(semKey,
								   PROC_NSEMS_PER_SET,
								   IPCProtection,
								   IpcSemaphoreDefaultStartValue,
263
								   0);
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285

		/*
		 * we might be reusing a semaphore that belongs to a dead backend.
		 * So be careful and reinitialize its value here.
		 */
		semun.val = IpcSemaphoreDefaultStartValue;
		semctl(semId, semNum, SETVAL, semun);

		IpcSemaphoreLock(semId, semNum, IpcExclusiveLock);
		MyProc->sem.semId = semId;
		MyProc->sem.semNum = semNum;
		MyProc->sem.semKey = semKey;
	}
	else
		MyProc->sem.semId = -1;

	/* ----------------------
	 * Release the lock.
	 * ----------------------
	 */
	SpinRelease(ProcStructLock);

B
Bruce Momjian 已提交
286
	MyProc->pid = MyProcPid;
287
	MyProc->databaseId = MyDatabaseId;
288
	MyProc->xid = InvalidTransactionId;
289
	MyProc->xmin = InvalidTransactionId;
290 291 292 293 294 295

	/* ----------------
	 * Start keeping spin lock stats from here on.	Any botch before
	 * this initialization is forever botched
	 * ----------------
	 */
B
Bruce Momjian 已提交
296
	MemSet(MyProc->sLocks, 0, MAX_SPINS * sizeof(*MyProc->sLocks));
297 298

	/* -------------------------
299
	 * Install ourselves in the shmem index table.	The name to
300 301 302 303 304 305
	 * use is determined by the OS-assigned process id.  That
	 * allows the cleanup process to find us after any untimely
	 * exit.
	 * -------------------------
	 */
	location = MAKE_OFFSET(MyProc);
B
Bruce Momjian 已提交
306
	if ((!ShmemPIDLookup(MyProcPid, &location)) || (location != MAKE_OFFSET(MyProc)))
307
		elog(STOP, "InitProc: ShmemPID table broken");
308 309 310 311

	MyProc->errType = NO_ERROR;
	SHMQueueElemInit(&(MyProc->links));

312
	on_shmem_exit(ProcKill, (caddr_t) MyProcPid);
313 314 315 316 317 318 319 320 321
}

/*
 * ProcReleaseLocks() -- release all locks associated with this process
 *
 */
void
ProcReleaseLocks()
{
322 323 324
	if (!MyProc)
		return;
	LockReleaseAll(1, &MyProc->lockQueue);
325 326 327 328
}

/*
 * ProcRemove -
329 330 331 332 333
 *	  used by the postmaster to clean up the global tables. This also frees
 *	  up the semaphore used for the lmgr of the process. (We have to do
 *	  this is the postmaster instead of doing a IpcSemaphoreKill on exiting
 *	  the process because the semaphore set is shared among backends and
 *	  we don't want to remove other's semaphores on exit.)
334 335 336 337
 */
bool
ProcRemove(int pid)
{
338 339
	SHMEM_OFFSET location;
	PROC	   *proc;
340 341 342 343 344

	location = INVALID_OFFSET;

	location = ShmemPIDDestroy(pid);
	if (location == INVALID_OFFSET)
345
		return FALSE;
346 347 348 349 350 351 352 353 354 355 356
	proc = (PROC *) MAKE_PTR(location);

	SpinAcquire(ProcStructLock);

	ProcFreeSem(proc->sem.semKey, proc->sem.semNum);

	proc->links.next = ProcGlobal->freeProcs;
	ProcGlobal->freeProcs = MAKE_OFFSET(proc);

	SpinRelease(ProcStructLock);

357
	return TRUE;
358 359 360 361
}

/*
 * ProcKill() -- Destroy the per-proc data structure for
362
 *		this process. Release any of its held spin locks.
363 364 365 366
 */
static void
ProcKill(int exitStatus, int pid)
{
367 368
	PROC	   *proc;
	SHMEM_OFFSET location;
369 370 371 372 373 374 375 376 377 378

	/* --------------------
	 * If this is a FATAL exit the postmaster will have to kill all the
	 * existing backends and reinitialize shared memory.  So all we don't
	 * need to do anything here.
	 * --------------------
	 */
	if (exitStatus != 0)
		return;

B
Bruce Momjian 已提交
379
	ShmemPIDLookup(MyProcPid, &location);
380 381 382 383 384
	if (location == INVALID_OFFSET)
		return;

	proc = (PROC *) MAKE_PTR(location);

385 386 387
	Assert(proc == MyProc || pid != MyProcPid);

	MyProc = NULL;
388 389 390 391 392 393

	/* ---------------
	 * Assume one lock table.
	 * ---------------
	 */
	ProcReleaseSpins(proc);
M
 
Marc G. Fournier 已提交
394
	LockReleaseAll(DEFAULT_LOCKMETHOD, &proc->lockQueue);
395

396
#ifdef USER_LOCKS
397

M
 
Marc G. Fournier 已提交
398 399 400 401
	/*
	 * Assume we have a second lock table.
	 */
	LockReleaseAll(USER_LOCKMETHOD, &proc->lockQueue);
402 403
#endif

404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
	/* ----------------
	 * get off the wait queue
	 * ----------------
	 */
	LockLockTable();
	if (proc->links.next != INVALID_OFFSET)
	{
		Assert(proc->waitLock->waitProcs.size > 0);
		SHMQueueDelete(&(proc->links));
		--proc->waitLock->waitProcs.size;
	}
	SHMQueueElemInit(&(proc->links));
	UnlockLockTable();

	return;
419 420 421 422
}

/*
 * ProcQueue package: routines for putting processes to sleep
423
 *		and  waking them up
424 425 426 427 428 429 430 431
 */

/*
 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
 *
 * Returns: a pointer to the queue or NULL
 * Side Effects: Initializes the queue if we allocated one
 */
432
#ifdef NOT_USED
433
PROC_QUEUE *
434 435
ProcQueueAlloc(char *name)
{
436 437
	bool		found;
	PROC_QUEUE *queue = (PROC_QUEUE *)
438 439 440
	ShmemInitStruct(name, (unsigned) sizeof(PROC_QUEUE), &found);

	if (!queue)
441
		return NULL;
442 443
	if (!found)
		ProcQueueInit(queue);
444
	return queue;
445
}
446

447
#endif
448 449 450 451 452

/*
 * ProcQueueInit -- initialize a shared memory process queue
 */
void
453
ProcQueueInit(PROC_QUEUE *queue)
454
{
455 456
	SHMQueueInit(&(queue->links));
	queue->size = 0;
457 458 459 460 461 462 463 464 465 466 467 468
}



/*
 * ProcSleep -- put a process to sleep
 *
 * P() on the semaphore should put us to sleep.  The process
 * semaphore is cleared by default, so the first time we try
 * to acquire it, we sleep.
 *
 * ASSUME: that no one will fiddle with the queue until after
469
 *		we release the spin lock.
470 471 472 473
 *
 * NOTES: The process queue is now a priority queue for locking.
 */
int
474
ProcSleep(PROC_QUEUE *waitQueue,/* lock->waitProcs */
475
		  LOCKMETHODCTL *lockctl,
476
		  int token,			/* lockmode */
V
Vadim B. Mikheev 已提交
477
		  LOCK *lock)
478
{
479
	int			i;
V
Vadim B. Mikheev 已提交
480
	SPINLOCK	spinlock = lockctl->masterLock;
481
	PROC	   *proc;
V
Vadim B. Mikheev 已提交
482 483 484 485 486
	int			myMask = (1 << token);
	int			waitMask = lock->waitMask;
	int			aheadHolders[MAX_LOCKMODES];
	bool		selfConflict = (lockctl->conflictTab[token] & myMask),
				prevSame = false;
B
Bruce Momjian 已提交
487 488 489
	bool		deadlock_checked = false;
	struct itimerval timeval,
				dummy;
490

V
Vadim B. Mikheev 已提交
491 492 493
	MyProc->token = token;
	MyProc->waitLock = lock;

B
Bruce Momjian 已提交
494
	proc = (PROC *) MAKE_PTR(waitQueue->links.prev);
495

V
Vadim B. Mikheev 已提交
496 497 498
	/* if we don't conflict with any waiter - be first in queue */
	if (!(lockctl->conflictTab[token] & waitMask))
		goto ins;
499

V
Vadim B. Mikheev 已提交
500 501 502
	for (i = 1; i < MAX_LOCKMODES; i++)
		aheadHolders[i] = lock->activeHolders[i];
	(aheadHolders[token])++;
503

V
Vadim B. Mikheev 已提交
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
	for (i = 0; i < waitQueue->size; i++)
	{
		/* am I waiting for him ? */
		if (lockctl->conflictTab[token] & proc->holdLock)
		{
			/* is he waiting for me ? */
			if (lockctl->conflictTab[proc->token] & MyProc->holdLock)
			{
				MyProc->errType = STATUS_ERROR;
				elog(NOTICE, DeadLockMessage);
				goto rt;
			}
			/* being waiting for him - go past */
		}
		/* if he waits for me */
		else if (lockctl->conflictTab[proc->token] & MyProc->holdLock)
			break;
		/* if conflicting locks requested */
		else if (lockctl->conflictTab[proc->token] & myMask)
		{
B
Bruce Momjian 已提交
524

V
Vadim B. Mikheev 已提交
525
			/*
B
Bruce Momjian 已提交
526 527
			 * If I request non self-conflicting lock and there are others
			 * requesting the same lock just before me - stay here.
V
Vadim B. Mikheev 已提交
528 529 530 531
			 */
			if (!selfConflict && prevSame)
				break;
		}
B
Bruce Momjian 已提交
532

V
Vadim B. Mikheev 已提交
533
		/*
B
Bruce Momjian 已提交
534 535
		 * Last attempt to don't move any more: if we don't conflict with
		 * rest waiters in queue.
V
Vadim B. Mikheev 已提交
536 537 538
		 */
		else if (!(lockctl->conflictTab[token] & waitMask))
			break;
539

V
Vadim B. Mikheev 已提交
540 541 542
		prevSame = (proc->token == token);
		(aheadHolders[proc->token])++;
		if (aheadHolders[proc->token] == lock->holders[proc->token])
B
Bruce Momjian 已提交
543
			waitMask &= ~(1 << proc->token);
V
Vadim B. Mikheev 已提交
544 545
		proc = (PROC *) MAKE_PTR(proc->links.prev);
	}
546

V
Vadim B. Mikheev 已提交
547
ins:;
548 549 550 551 552 553
	/* -------------------
	 * assume that these two operations are atomic (because
	 * of the spinlock).
	 * -------------------
	 */
	SHMQueueInsertTL(&(proc->links), &(MyProc->links));
B
Bruce Momjian 已提交
554
	waitQueue->size++;
555

V
Vadim B. Mikheev 已提交
556
	lock->waitMask |= myMask;
557 558 559
	SpinRelease(spinlock);

	/* --------------
B
Bruce Momjian 已提交
560
	 * We set this so we can wake up periodically and check for a deadlock.
B
Bruce Momjian 已提交
561 562
	 * If a deadlock is detected, the handler releases the processes
	 * semaphore and aborts the current transaction.
B
Bruce Momjian 已提交
563 564 565
	 *
	 * Need to zero out struct to set the interval and the micro seconds fields
	 * to 0.
566 567
	 * --------------
	 */
B
Bruce Momjian 已提交
568 569 570
	MemSet(&timeval, 0, sizeof(struct itimerval));
	timeval.it_value.tv_sec = \
		(DeadlockCheckTimer ? DeadlockCheckTimer : DEADLOCK_CHECK_TIMER);
571

B
Bruce Momjian 已提交
572 573
	do
	{
574
		MyProc->errType = NO_ERROR;		/* reset flag after deadlock check */
575

B
Bruce Momjian 已提交
576 577 578 579 580
		if (!deadlock_checked)
			if (setitimer(ITIMER_REAL, &timeval, &dummy))
				elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
		deadlock_checked = true;

B
Bruce Momjian 已提交
581 582 583 584 585 586
		/* --------------
		 * if someone wakes us between SpinRelease and IpcSemaphoreLock,
		 * IpcSemaphoreLock will not block.  The wakeup is "saved" by
		 * the semaphore implementation.
		 * --------------
		 */
M
 
Marc G. Fournier 已提交
587 588
		IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum,
						 IpcExclusiveLock);
589 590 591
	} while (MyProc->errType == STATUS_NOT_FOUND);		/* sleep after deadlock
														 * check */

B
Bruce Momjian 已提交
592 593 594 595 596 597 598 599
	/* ---------------
	 * We were awoken before a timeout - now disable the timer
	 * ---------------
	 */
	timeval.it_value.tv_sec = 0;
	if (setitimer(ITIMER_REAL, &timeval, &dummy))
		elog(FATAL, "ProcSleep: Unable to diable timer for process wakeup");

600 601 602 603 604 605 606
	/* ----------------
	 * We were assumed to be in a critical section when we went
	 * to sleep.
	 * ----------------
	 */
	SpinAcquire(spinlock);

V
Vadim B. Mikheev 已提交
607 608
rt:;

M
 
Marc G. Fournier 已提交
609 610
#ifdef LOCK_MGR_DEBUG
	/* Just to get meaningful debug messages from DumpLocks() */
611
	MyProc->waitLock = (LOCK *) NULL;
M
 
Marc G. Fournier 已提交
612 613
#endif

614
	return MyProc->errType;
615 616 617 618 619 620
}


/*
 * ProcWakeup -- wake up a process by releasing its private semaphore.
 *
621 622
 *	 remove the process from the wait queue and set its links invalid.
 *	 RETURN: the next process in the wait queue.
623
 */
B
Bruce Momjian 已提交
624
PROC *
625
ProcWakeup(PROC *proc, int errType)
626
{
627
	PROC	   *retProc;
628 629 630 631 632

	/* assume that spinlock has been acquired */

	if (proc->links.prev == INVALID_OFFSET ||
		proc->links.next == INVALID_OFFSET)
633
		return (PROC *) NULL;
634 635 636 637 638 639 640 641 642 643 644 645

	retProc = (PROC *) MAKE_PTR(proc->links.prev);

	/* you have to update waitLock->waitProcs.size yourself */
	SHMQueueDelete(&(proc->links));
	SHMQueueElemInit(&(proc->links));

	proc->errType = errType;

	IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum, IpcExclusiveLock);

	return retProc;
646 647 648 649
}

/*
 * ProcLockWakeup -- routine for waking up processes when a lock is
650
 *		released.
651 652
 */
int
653
ProcLockWakeup(PROC_QUEUE *queue, LOCKMETHOD lockmethod, LOCK *lock)
654
{
655
	PROC	   *proc;
V
Vadim B. Mikheev 已提交
656
	int			count = 0;
M
 
Marc G. Fournier 已提交
657
	int			trace_flag;
V
Vadim B. Mikheev 已提交
658
	int			last_locktype = 0;
M
 
Marc G. Fournier 已提交
659 660 661
	int			queue_size = queue->size;

	Assert(queue->size >= 0);
662 663

	if (!queue->size)
664
		return STATUS_NOT_FOUND;
665 666

	proc = (PROC *) MAKE_PTR(queue->links.prev);
M
 
Marc G. Fournier 已提交
667 668
	while ((queue_size--) && (proc))
	{
669

M
 
Marc G. Fournier 已提交
670
		/*
671 672
		 * This proc will conflict as the previous one did, don't even
		 * try.
M
 
Marc G. Fournier 已提交
673 674 675 676 677
		 */
		if (proc->token == last_locktype)
			continue;

		/*
V
Vadim B. Mikheev 已提交
678
		 * Does this proc conflict with locks held by others ?
M
 
Marc G. Fournier 已提交
679 680
		 */
		if (LockResolveConflicts(lockmethod,
681
								 lock,
682
								 proc->token,
M
 
Marc G. Fournier 已提交
683 684 685
								 proc->xid,
								 (XIDLookupEnt *) NULL) != STATUS_OK)
		{
V
Vadim B. Mikheev 已提交
686 687
			if (count != 0)
				break;
M
 
Marc G. Fournier 已提交
688 689 690
			last_locktype = proc->token;
			continue;
		}
691 692 693 694 695 696 697

		/*
		 * there was a waiting process, grant it the lock before waking it
		 * up.	This will prevent another process from seizing the lock
		 * between the time we release the lock master (spinlock) and the
		 * time that the awoken process begins executing again.
		 */
698
		GrantLock(lock, proc->token);
699 700 701

		/*
		 * ProcWakeup removes proc from the lock waiting process queue and
702
		 * returns the next proc in chain.
703 704 705
		 */

		count++;
M
 
Marc G. Fournier 已提交
706 707
		queue->size--;
		proc = ProcWakeup(proc, NO_ERROR);
708
	}
709

M
 
Marc G. Fournier 已提交
710 711
	Assert(queue->size >= 0);

712
	if (count)
713
		return STATUS_OK;
714 715
	else
	{
716
		/* Something is still blocking us.	May have deadlocked. */
M
 
Marc G. Fournier 已提交
717 718 719 720 721 722 723 724 725
		trace_flag = (lock->tag.lockmethod == USER_LOCKMETHOD) ? \
			TRACE_USERLOCKS : TRACE_LOCKS;
		TPRINTF(trace_flag,
				"ProcLockWakeup: lock(%x) can't wake up any process",
				MAKE_OFFSET(lock));
#ifdef DEADLOCK_DEBUG
		if (pg_options[trace_flag] >= 2)
			DumpAllLocks();
#endif
726
		return STATUS_NOT_FOUND;
M
 
Marc G. Fournier 已提交
727
	}
728 729 730
}

void
731
ProcAddLock(SHM_QUEUE *elem)
732
{
733
	SHMQueueInsertTL(&MyProc->lockQueue, elem);
734 735 736
}

/* --------------------
B
Bruce Momjian 已提交
737 738 739
 * We only get to this routine if we got SIGALRM after DEADLOCK_CHECK_TIMER
 * while waiting for a lock to be released by some other process.  If we have
 * a real deadlock, we must also indicate that I'm no longer waiting
740
 * on a lock so that other processes don't try to wake me up and screw
741 742 743
 * up my semaphore.
 * --------------------
 */
744 745
void
HandleDeadLock(SIGNAL_ARGS)
746
{
B
Bruce Momjian 已提交
747
	LOCK	   *mywaitlock;
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785

	LockLockTable();

	/* ---------------------
	 * Check to see if we've been awoken by anyone in the interim.
	 *
	 * If we have we can return and resume our transaction -- happy day.
	 * Before we are awoken the process releasing the lock grants it to
	 * us so we know that we don't have to wait anymore.
	 *
	 * Damn these names are LONG! -mer
	 * ---------------------
	 */
	if (IpcSemaphoreGetCount(MyProc->sem.semId, MyProc->sem.semNum) ==
		IpcSemaphoreDefaultStartValue)
	{
		UnlockLockTable();
		return;
	}

	/*
	 * you would think this would be unnecessary, but...
	 *
	 * this also means we've been removed already.  in some ports (e.g.,
	 * sparc and aix) the semop(2) implementation is such that we can
	 * actually end up in this handler after someone has removed us from
	 * the queue and bopped the semaphore *but the test above fails to
	 * detect the semaphore update* (presumably something weird having to
	 * do with the order in which the semaphore wakeup signal and SIGALRM
	 * get handled).
	 */
	if (MyProc->links.prev == INVALID_OFFSET ||
		MyProc->links.next == INVALID_OFFSET)
	{
		UnlockLockTable();
		return;
	}

786
#ifdef DEADLOCK_DEBUG
M
 
Marc G. Fournier 已提交
787
	DumpAllLocks();
788 789
#endif

B
Bruce Momjian 已提交
790 791
	MyProc->errType = STATUS_NOT_FOUND;
	if (!DeadLockCheck(MyProc, MyProc->waitLock))
B
Bruce Momjian 已提交
792 793 794 795 796 797 798
	{
		UnlockLockTable();
		return;
	}

	mywaitlock = MyProc->waitLock;

799 800 801 802
	/* ------------------------
	 * Get this process off the lock's wait queue
	 * ------------------------
	 */
B
Bruce Momjian 已提交
803 804
	Assert(mywaitlock->waitProcs.size > 0);
	--mywaitlock->waitProcs.size;
805 806 807 808 809 810 811 812
	SHMQueueDelete(&(MyProc->links));
	SHMQueueElemInit(&(MyProc->links));

	/* ------------------
	 * Unlock my semaphore so that the count is right for next time.
	 * I was awoken by a signal, not by someone unlocking my semaphore.
	 * ------------------
	 */
M
 
Marc G. Fournier 已提交
813 814
	IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum,
					   IpcExclusiveLock);
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829

	/* -------------
	 * Set MyProc->errType to STATUS_ERROR so that we abort after
	 * returning from this handler.
	 * -------------
	 */
	MyProc->errType = STATUS_ERROR;

	/*
	 * if this doesn't follow the IpcSemaphoreUnlock then we get lock
	 * table corruption ("LockReplace: xid table corrupted") due to race
	 * conditions.	i don't claim to understand this...
	 */
	UnlockLockTable();

V
Vadim B. Mikheev 已提交
830
	elog(NOTICE, DeadLockMessage);
831
	return;
832 833 834
}

void
835
ProcReleaseSpins(PROC *proc)
836
{
837
	int			i;
838 839 840 841 842 843 844

	if (!proc)
		proc = MyProc;

	if (!proc)
		return;
	for (i = 0; i < (int) MAX_SPINS; i++)
845
	{
846
		if (proc->sLocks[i])
847
		{
848 849
			Assert(proc->sLocks[i] == 1);
			SpinRelease(i);
850 851 852 853 854
		}
	}
}

/*****************************************************************************
855
 *
856 857 858 859
 *****************************************************************************/

/*
 * ProcGetNewSemKeyAndNum -
860 861 862 863
 *	  scan the free semaphore bitmap and allocate a single semaphore from
 *	  a semaphore set. (If the semaphore set doesn't exist yet,
 *	  IpcSemaphoreCreate will create it. Otherwise, we use the existing
 *	  semaphore set.)
864 865
 */
static void
866
ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum)
867
{
868 869
	int			i;
	int32	   *freeSemMap = ProcGlobal->freeSemMap;
B
Bruce Momjian 已提交
870
	int32		fullmask = (1 << (PROC_NSEMS_PER_SET + 1)) - 1;
871

872 873 874 875
	/*
	 * we hold ProcStructLock when entering this routine. We scan through
	 * the bitmap to look for a free semaphore.
	 */
876

877 878
	for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++)
	{
879 880
		int			mask = 1;
		int			j;
881 882

		if (freeSemMap[i] == fullmask)
883
			continue;			/* this set is fully allocated */
884 885 886 887 888 889 890

		for (j = 0; j < PROC_NSEMS_PER_SET; j++)
		{
			if ((freeSemMap[i] & mask) == 0)
			{

				/*
B
Bruce Momjian 已提交
891 892
				 * a free semaphore found. Mark it as allocated. Also set
				 * the bit indicating whole set is allocated.
893
				 */
894
				freeSemMap[i] |= mask + (1 << PROC_NSEMS_PER_SET);
895 896 897 898 899 900 901

				*key = ProcGlobal->currKey + i;
				*semNum = j;
				return;
			}
			mask <<= 1;
		}
902 903
	}

904
	/* if we reach here, all the semaphores are in use. */
905
	elog(ERROR, "InitProc: cannot allocate a free semaphore");
906 907 908 909
}

/*
 * ProcFreeSem -
910
 *	  free up our semaphore in the semaphore set.
911 912 913 914
 */
static void
ProcFreeSem(IpcSemaphoreKey semKey, int semNum)
{
915 916 917
	int			mask;
	int			i;
	int32	   *freeSemMap = ProcGlobal->freeSemMap;
918

919 920 921
	i = semKey - ProcGlobal->currKey;
	mask = ~(1 << semNum);
	freeSemMap[i] &= mask;
922

B
Bruce Momjian 已提交
923 924 925 926
	/*
	 * Formerly we'd release a semaphore set if it was now completely
	 * unused, but now we keep the semaphores to ensure we won't run out
	 * when starting new backends --- cf. InitProcGlobal.  Note that the
927 928 929
	 * PROC_NSEMS_PER_SET+1'st bit of the freeSemMap entry remains set to
	 * indicate it is still allocated; ProcFreeAllSemaphores() needs that.
	 */
930 931 932 933
}

/*
 * ProcFreeAllSemaphores -
934 935 936
 *	  called at shmem_exit time, ie when exiting the postmaster or
 *	  destroying shared state for a failed set of backends.
 *	  Free up all the semaphores allocated to the lmgrs of the backends.
937
 */
938
static void
939 940
ProcFreeAllSemaphores()
{
941 942
	int			i;
	int32	   *freeSemMap = ProcGlobal->freeSemMap;
943

944 945 946 947 948
	for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++)
	{
		if (freeSemMap[i] != 0)
			IpcSemaphoreKill(ProcGlobal->currKey + i);
	}
949
}