proc.c 28.2 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * proc.c
4
 *	  routines to manage per-process shared memory data structure
5
 *
B
Add:  
Bruce Momjian 已提交
6 7
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.94 2001/01/16 20:59:34 tgl Exp $
12 13 14 15
 *
 *-------------------------------------------------------------------------
 */
/*
16 17
 *	Each postgres backend gets one of these.  We'll use it to
 *	clean up after the process should the process suddenly die.
18 19 20
 *
 *
 * Interface (a):
21 22 23
 *		ProcSleep(), ProcWakeup(), ProcWakeupNext(),
 *		ProcQueueAlloc() -- create a shm queue for sleeping processes
 *		ProcQueueInit() -- create a queue without allocing memory
24 25 26 27 28 29 30 31
 *
 * Locking and waiting for buffers can cause the backend to be
 * put to sleep.  Whoever releases the lock, etc. wakes the
 * process up again (and gives it an error code so it knows
 * whether it was awoken on an error condition).
 *
 * Interface (b):
 *
32 33
 * ProcReleaseLocks -- frees the locks associated with current transaction
 *
34
 * ProcKill -- destroys the shared memory state (and locks)
35
 *		associated with the process.
36 37
 *
 * 5/15/91 -- removed the buffer pool based lock chain in favor
38 39 40 41 42 43
 *		of a shared memory lock chain.	The write-protection is
 *		more expensive if the lock chain is in the buffer pool.
 *		The only reason I kept the lock chain in the buffer pool
 *		in the first place was to allow the lock table to grow larger
 *		than available shared memory and that isn't going to work
 *		without a lot of unimplemented support anyway.
44 45
 *
 * 4/7/95 -- instead of allocating a set of 1 semaphore per process, we
46 47 48 49
 *		allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores
 *		shared among backends (we keep a few sets of semaphores around).
 *		This is so that we can support more backends. (system-wide semaphore
 *		sets run out pretty fast.)				  -ay 4/95
50
 *
51
 * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.94 2001/01/16 20:59:34 tgl Exp $
52
 */
53 54
#include "postgres.h"

55
#include <errno.h>
56 57
#include <sys/time.h>
#include <unistd.h>
58
#include <signal.h>
59
#include <sys/types.h>
M
Marc G. Fournier 已提交
60

61
#if defined(solaris_sparc) || defined(__CYGWIN__)
62 63 64 65
#include <sys/ipc.h>
#include <sys/sem.h>
#endif

66 67
#include "miscadmin.h"

68 69 70 71
#if defined(__darwin__)
#include "port/darwin/sem.h"
#endif

72
/* In Ultrix and QNX, sem.h must be included after ipc.h */
73
#ifdef HAVE_SYS_SEM_H
74
#include <sys/sem.h>
75
#endif
B
Bruce Momjian 已提交
76

77
#include "access/xact.h"
78 79
#include "storage/proc.h"

80

81
int DeadlockTimeout = 1000;
M
 
Marc G. Fournier 已提交
82

83 84 85 86 87 88 89
/* --------------------
 * Spin lock for manipulating the shared process data structure:
 * ProcGlobal.... Adding an extra spin lock seemed like the smallest
 * hack to get around reading and updating this structure in shared
 * memory. -mer 17 July 1991
 * --------------------
 */
90
SPINLOCK	ProcStructLock;
91 92 93

static PROC_HDR *ProcGlobal = NULL;

94
PROC	   *MyProc = NULL;
95

96 97 98
static bool waitingForLock = false;

static void ProcKill(void);
99 100
static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum);
static void ProcFreeSem(IpcSemaphoreId semId, int semNum);
101 102 103
static void ZeroProcSemaphore(PROC *proc);
static void ProcFreeAllSemaphores(void);

V
Vadim B. Mikheev 已提交
104

105 106
/*
 * InitProcGlobal -
107
 *	  initializes the global process table. We put it here so that
108
 *	  the postmaster can do this initialization. (ProcFreeAllSemaphores needs
109 110 111
 *	  to read this table on exiting the postmaster. If we have the first
 *	  backend do this, starting up and killing the postmaster without
 *	  starting any backends will be a problem.)
112 113 114 115 116 117 118 119 120 121 122
 *
 *	  We also allocate all the per-process semaphores we will need to support
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
 *	  (mis)configured with small limits on the number of semaphores, and
 *	  running out when trying to start another backend is a common failure.
 *	  So, now we grab enough semaphores to support the desired max number
 *	  of backends immediately at initialization --- if the sysadmin has set
 *	  MaxBackends higher than his kernel will support, he'll find out sooner
 *	  rather than later.
123 124
 */
void
125
InitProcGlobal(int maxBackends)
126
{
127
	bool		found = false;
128

129 130
	/* attach to the free list */
	ProcGlobal = (PROC_HDR *)
131
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
132

133 134
	/* --------------------
	 * We're the first - initialize.
135 136
	 * XXX if found should ever be true, it is a sign of impending doom ...
	 * ought to complain if so?
137 138 139
	 * --------------------
	 */
	if (!found)
140
	{
141
		int			i;
142

143
		ProcGlobal->freeProcs = INVALID_OFFSET;
144 145 146
		for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
		{
			ProcGlobal->procSemIds[i] = -1;
147
			ProcGlobal->freeSemMap[i] = 0;
148
		}
149

B
Bruce Momjian 已提交
150 151
		/*
		 * Arrange to delete semas on exit --- set this up now so that we
152 153 154 155
		 * will clean up if pre-allocation fails.  We use our own freeproc,
		 * rather than IpcSemaphoreCreate's removeOnExit option, because
		 * we don't want to fill up the on_shmem_exit list with a separate
		 * entry for each semaphore set.
156
		 */
157
		on_shmem_exit(ProcFreeAllSemaphores, 0);
158

B
Bruce Momjian 已提交
159
		/*
160
		 * Pre-create the semaphores for the first maxBackends processes.
161
		 */
162 163 164
		Assert(maxBackends > 0 && maxBackends <= MAXBACKENDS);

		for (i = 0; i < ((maxBackends-1)/PROC_NSEMS_PER_SET+1); i++)
165
		{
166 167 168 169 170 171 172
			IpcSemaphoreId		semId;

			semId = IpcSemaphoreCreate(PROC_NSEMS_PER_SET,
									   IPCProtection,
									   1,
									   false);
			ProcGlobal->procSemIds[i] = semId;
173
		}
174 175 176 177 178 179 180 181 182
	}
}

/* ------------------------
 * InitProc -- create a per-process data structure for this process
 * used by the lock manager on semaphore queues.
 * ------------------------
 */
void
183
InitProcess(void)
184
{
185 186 187
	bool		found = false;
	unsigned long location,
				myOffset;
188 189 190

	SpinAcquire(ProcStructLock);

191
	/* attach to the ProcGlobal structure */
192
	ProcGlobal = (PROC_HDR *)
193
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
194
	if (!found)
195
	{
196
		/* this should not happen. InitProcGlobal() is called before this. */
197
		elog(STOP, "InitProcess: Proc Header uninitialized");
198
	}
199 200

	if (MyProc != NULL)
201
	{
202
		SpinRelease(ProcStructLock);
203
		elog(ERROR, "ProcInit: you already exist");
204
	}
205

206
	/* try to get a proc struct from the free list first */
207 208 209 210

	myOffset = ProcGlobal->freeProcs;

	if (myOffset != INVALID_OFFSET)
211
	{
212 213 214 215 216 217 218
		MyProc = (PROC *) MAKE_PTR(myOffset);
		ProcGlobal->freeProcs = MyProc->links.next;
	}
	else
	{

		/*
219 220 221 222
		 * have to allocate one.  We can't use the normal shmem index
		 * table mechanism because the proc structure is stored by PID
		 * instead of by a global name (need to look it up by PID when we
		 * cleanup dead processes).
223 224
		 */

225
		MyProc = (PROC *) ShmemAlloc(sizeof(PROC));
226
		if (!MyProc)
227
		{
228 229
			SpinRelease(ProcStructLock);
			elog(FATAL, "cannot create new proc: out of memory");
230
		}
231 232

		/* this cannot be initialized until after the buffer pool */
233
		SHMQueueInit(&(MyProc->holderQueue));
234
	}
235

236
	/*
237 238 239
	 * zero out the spin lock counts and set the sLocks field for
	 * ProcStructLock to 1 as we have acquired this spinlock above but
	 * didn't record it since we didn't have MyProc until now.
240
	 */
B
Bruce Momjian 已提交
241
	MemSet(MyProc->sLocks, 0, sizeof(MyProc->sLocks));
242 243
	MyProc->sLocks[ProcStructLock] = 1;

244 245 246
	/*
	 * Set up a wait-semaphore for the proc.
	 */
247 248
	if (IsUnderPostmaster)
	{
249
		ProcGetNewSemIdAndNum(&MyProc->sem.semId, &MyProc->sem.semNum);
250 251 252 253
		/*
		 * we might be reusing a semaphore that belongs to a dead backend.
		 * So be careful and reinitialize its value here.
		 */
254
		ZeroProcSemaphore(MyProc);
255 256
	}
	else
257
	{
258
		MyProc->sem.semId = -1;
259 260
		MyProc->sem.semNum = -1;
	}
261

B
Bruce Momjian 已提交
262
	MyProc->pid = MyProcPid;
263
	MyProc->databaseId = MyDatabaseId;
264
	MyProc->xid = InvalidTransactionId;
265
	MyProc->xmin = InvalidTransactionId;
266

267 268 269
	/* ----------------------
	 * Release the lock.
	 * ----------------------
270
	 */
271
	SpinRelease(ProcStructLock);
272 273

	/* -------------------------
274
	 * Install ourselves in the shmem index table.	The name to
275 276 277 278 279 280
	 * use is determined by the OS-assigned process id.  That
	 * allows the cleanup process to find us after any untimely
	 * exit.
	 * -------------------------
	 */
	location = MAKE_OFFSET(MyProc);
281 282
	if ((!ShmemPIDLookup(MyProcPid, &location)) ||
		(location != MAKE_OFFSET(MyProc)))
283
		elog(STOP, "InitProcess: ShmemPID table broken");
284 285 286 287

	MyProc->errType = NO_ERROR;
	SHMQueueElemInit(&(MyProc->links));

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
	on_shmem_exit(ProcKill, 0);
}

/*
 * Initialize the proc's wait-semaphore to count zero.
 */
static void
ZeroProcSemaphore(PROC *proc)
{
	union semun		semun;

	semun.val = 0;
	if (semctl(proc->sem.semId, proc->sem.semNum, SETVAL, semun) < 0)
	{
		fprintf(stderr, "ZeroProcSemaphore: semctl(id=%d,SETVAL) failed: %s\n",
				proc->sem.semId, strerror(errno));
		proc_exit(255);
	}
306 307
}

308 309 310 311
/*
 * Remove a proc from the wait-queue it is on
 * (caller must know it is on one).
 * Locktable lock must be held by caller.
312 313
 *
 * NB: this does not remove the process' holder object, nor the lock object,
314 315
 * even though their counts might now have gone to zero.  That will happen
 * during a subsequent LockReleaseAll call, which we expect will happen
316 317
 * during transaction cleanup.  (Removal of a proc from its wait queue by
 * this routine can only happen if we are aborting the transaction.)
H
Hiroshi Inoue 已提交
318
 */
319 320
static void
RemoveFromWaitQueue(PROC *proc)
H
Hiroshi Inoue 已提交
321
{
322 323
	LOCK   *waitLock = proc->waitLock;
	LOCKMODE lockmode = proc->waitLockMode;
324

325 326 327 328 329 330 331 332 333
	/* Make sure proc is waiting */
	Assert(proc->links.next != INVALID_OFFSET);
	Assert(waitLock);
	Assert(waitLock->waitProcs.size > 0);

	/* Remove proc from lock's wait queue */
	SHMQueueDelete(&(proc->links));
	waitLock->waitProcs.size--;

334 335 336 337 338 339
	/* Undo increments of request counts by waiting process */
	Assert(waitLock->nRequested > 0);
	Assert(waitLock->nRequested > proc->waitLock->nGranted);
	waitLock->nRequested--;
	Assert(waitLock->requested[lockmode] > 0);
	waitLock->requested[lockmode]--;
340
	/* don't forget to clear waitMask bit if appropriate */
341
	if (waitLock->granted[lockmode] == waitLock->requested[lockmode])
342 343 344 345 346 347 348 349 350 351 352 353 354 355
		waitLock->waitMask &= ~(1 << lockmode);

	/* Clean up the proc's own state */
	SHMQueueElemInit(&(proc->links));
	proc->waitLock = NULL;
	proc->waitHolder = NULL;

	/* See if any other waiters for the lock can be woken up now */
	ProcLockWakeup(LOCK_LOCKMETHOD(*waitLock), waitLock);
}

/*
 * Cancel any pending wait for lock, when aborting a transaction.
 *
356 357
 * Returns true if we had been waiting for a lock, else false.
 *
358 359 360 361
 * (Normally, this would only happen if we accept a cancel/die
 * interrupt while waiting; but an elog(ERROR) while waiting is
 * within the realm of possibility, too.)
 */
362
bool
363 364 365 366
LockWaitCancel(void)
{
	/* Nothing to do if we weren't waiting for a lock */
	if (!waitingForLock)
367 368
		return false;

369 370 371 372
	waitingForLock = false;

	/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
#ifndef __BEOS__
H
Hiroshi Inoue 已提交
373
	{
374 375 376 377 378
		struct itimerval timeval,
						 dummy;

		MemSet(&timeval, 0, sizeof(struct itimerval));
		setitimer(ITIMER_REAL, &timeval, &dummy);
H
Hiroshi Inoue 已提交
379
	}
380 381 382 383 384 385 386 387 388
#else
	/* BeOS doesn't have setitimer, but has set_alarm */
    set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM);
#endif /* __BEOS__ */

	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
	LockLockTable();
	if (MyProc->links.next != INVALID_OFFSET)
		RemoveFromWaitQueue(MyProc);
H
Hiroshi Inoue 已提交
389 390
	UnlockLockTable();

391 392 393 394 395 396 397 398 399 400
	/*
	 * Reset the proc wait semaphore to zero.  This is necessary in the
	 * scenario where someone else granted us the lock we wanted before we
	 * were able to remove ourselves from the wait-list.  The semaphore will
	 * have been bumped to 1 by the would-be grantor, and since we are no
	 * longer going to wait on the sema, we have to force it back to zero.
	 * Otherwise, our next attempt to wait for a lock will fall through
	 * prematurely.
	 */
	ZeroProcSemaphore(MyProc);
401 402 403 404 405 406

	/*
	 * Return true even if we were kicked off the lock before we were
	 * able to remove ourselves.
	 */
	return true;
H
Hiroshi Inoue 已提交
407
}
408

409

410
/*
411 412 413 414 415 416 417 418
 * ProcReleaseLocks() -- release locks associated with current transaction
 *			at transaction commit or abort
 *
 * At commit, we release only locks tagged with the current transaction's XID,
 * leaving those marked with XID 0 (ie, session locks) undisturbed.  At abort,
 * we release all locks including XID 0, because we need to clean up after
 * a failure.  This logic will need extension if we ever support nested
 * transactions.
419
 *
420
 * Note that user locks are not released in either case.
421 422
 */
void
423
ProcReleaseLocks(bool isCommit)
424
{
425 426
	if (!MyProc)
		return;
427 428 429
	/* If waiting, get off wait queue (should only be needed after error) */
	LockWaitCancel();
	/* Release locks */
430 431
	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc,
				   !isCommit, GetCurrentTransactionId());
432 433 434 435
}

/*
 * ProcRemove -
436 437
 *	  called by the postmaster to clean up the global tables after a
 *	  backend exits.  This also frees up the proc's wait semaphore.
438 439 440 441
 */
bool
ProcRemove(int pid)
{
442 443
	SHMEM_OFFSET location;
	PROC	   *proc;
444 445 446

	location = ShmemPIDDestroy(pid);
	if (location == INVALID_OFFSET)
447
		return FALSE;
448 449 450 451
	proc = (PROC *) MAKE_PTR(location);

	SpinAcquire(ProcStructLock);

452
	ProcFreeSem(proc->sem.semId, proc->sem.semNum);
453 454 455 456 457 458

	proc->links.next = ProcGlobal->freeProcs;
	ProcGlobal->freeProcs = MAKE_OFFSET(proc);

	SpinRelease(ProcStructLock);

459
	return TRUE;
460 461 462 463
}

/*
 * ProcKill() -- Destroy the per-proc data structure for
464
 *		this process. Release any of its held spin locks.
465 466 467
 *
 * This is done inside the backend process before it exits.
 * ProcRemove, above, will be done by the postmaster afterwards.
468 469
 */
static void
470
ProcKill(void)
471
{
472
	Assert(MyProc);
473

474 475
	/* Release any spinlocks I am holding */
	ProcReleaseSpins(MyProc);
476

477 478
	/* Get off any wait queue I might be on */
	LockWaitCancel();
479

480
	/* Remove from the standard lock table */
481
	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, true, InvalidTransactionId);
482

483 484
#ifdef USER_LOCKS
	/* Remove from the user lock table */
485
	LockReleaseAll(USER_LOCKMETHOD, MyProc, true, InvalidTransactionId);
486
#endif
487 488

	MyProc = NULL;
489 490 491 492
}

/*
 * ProcQueue package: routines for putting processes to sleep
493
 *		and  waking them up
494 495 496 497 498 499 500 501
 */

/*
 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
 *
 * Returns: a pointer to the queue or NULL
 * Side Effects: Initializes the queue if we allocated one
 */
502
#ifdef NOT_USED
503
PROC_QUEUE *
504 505
ProcQueueAlloc(char *name)
{
506 507
	bool		found;
	PROC_QUEUE *queue = (PROC_QUEUE *)
508
		ShmemInitStruct(name, sizeof(PROC_QUEUE), &found);
509 510

	if (!queue)
511
		return NULL;
512 513
	if (!found)
		ProcQueueInit(queue);
514
	return queue;
515
}
516

517
#endif
518 519 520 521 522

/*
 * ProcQueueInit -- initialize a shared memory process queue
 */
void
523
ProcQueueInit(PROC_QUEUE *queue)
524
{
525 526
	SHMQueueInit(&(queue->links));
	queue->size = 0;
527 528 529 530 531 532 533
}


/*
 * ProcSleep -- put a process to sleep
 *
 * P() on the semaphore should put us to sleep.  The process
534 535 536 537
 * semaphore is normally zero, so when we try to acquire it, we sleep.
 *
 * Locktable's spinlock must be held at entry, and will be held
 * at exit.
538
 *
539 540
 * Result is NO_ERROR if we acquired the lock, STATUS_ERROR if not (deadlock).
 *
541
 * ASSUME: that no one will fiddle with the queue until after
542
 *		we release the spin lock.
543 544 545 546
 *
 * NOTES: The process queue is now a priority queue for locking.
 */
int
547 548 549 550
ProcSleep(LOCKMETHODCTL *lockctl,
		  LOCKMODE lockmode,
		  LOCK *lock,
		  HOLDER *holder)
551
{
552
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
V
Vadim B. Mikheev 已提交
553
	SPINLOCK	spinlock = lockctl->masterLock;
554
	int			myMask = (1 << lockmode);
V
Vadim B. Mikheev 已提交
555
	int			waitMask = lock->waitMask;
556 557
	PROC	   *proc;
	int			i;
558
	int			aheadGranted[MAX_LOCKMODES];
559
	bool		selfConflict = (lockctl->conflictTab[lockmode] & myMask),
V
Vadim B. Mikheev 已提交
560
				prevSame = false;
561
#ifndef __BEOS__
B
Bruce Momjian 已提交
562 563
	struct itimerval timeval,
				dummy;
564 565 566
#else
    bigtime_t time_interval;
#endif
567

V
Vadim B. Mikheev 已提交
568
	MyProc->waitLock = lock;
569 570
	MyProc->waitHolder = holder;
	MyProc->waitLockMode = lockmode;
571
	/* We assume the caller set up MyProc->heldLocks */
V
Vadim B. Mikheev 已提交
572

B
Bruce Momjian 已提交
573
	proc = (PROC *) MAKE_PTR(waitQueue->links.prev);
574

V
Vadim B. Mikheev 已提交
575
	/* if we don't conflict with any waiter - be first in queue */
576
	if (!(lockctl->conflictTab[lockmode] & waitMask))
V
Vadim B. Mikheev 已提交
577
		goto ins;
578

579
	/* otherwise, determine where we should go into the queue */
V
Vadim B. Mikheev 已提交
580
	for (i = 1; i < MAX_LOCKMODES; i++)
581 582
		aheadGranted[i] = lock->granted[i];
	(aheadGranted[lockmode])++;
583

V
Vadim B. Mikheev 已提交
584 585
	for (i = 0; i < waitQueue->size; i++)
	{
586 587 588 589
		LOCKMODE	procWaitMode = proc->waitLockMode;

		/* must I wait for him ? */
		if (lockctl->conflictTab[lockmode] & proc->heldLocks)
V
Vadim B. Mikheev 已提交
590 591
		{
			/* is he waiting for me ? */
592
			if (lockctl->conflictTab[procWaitMode] & MyProc->heldLocks)
V
Vadim B. Mikheev 已提交
593
			{
594
				/* Yes, report deadlock failure */
V
Vadim B. Mikheev 已提交
595 596 597
				MyProc->errType = STATUS_ERROR;
				goto rt;
			}
598
			/* I must go after him in queue - so continue loop */
V
Vadim B. Mikheev 已提交
599
		}
600 601
		/* if he waits for me, go before him in queue */
		else if (lockctl->conflictTab[procWaitMode] & MyProc->heldLocks)
V
Vadim B. Mikheev 已提交
602 603
			break;
		/* if conflicting locks requested */
604
		else if (lockctl->conflictTab[procWaitMode] & myMask)
V
Vadim B. Mikheev 已提交
605
		{
B
Bruce Momjian 已提交
606

V
Vadim B. Mikheev 已提交
607
			/*
B
Bruce Momjian 已提交
608
			 * If I request non self-conflicting lock and there are others
609
			 * requesting the same lock just before this guy - stop here.
V
Vadim B. Mikheev 已提交
610 611 612 613
			 */
			if (!selfConflict && prevSame)
				break;
		}
B
Bruce Momjian 已提交
614

V
Vadim B. Mikheev 已提交
615
		/*
616 617
		 * Last attempt to not move any further to the back of the queue:
		 * if we don't conflict with remaining waiters, stop here.
V
Vadim B. Mikheev 已提交
618
		 */
619
		else if (!(lockctl->conflictTab[lockmode] & waitMask))
V
Vadim B. Mikheev 已提交
620
			break;
621

622 623 624 625 626
		/* Move past this guy, and update state accordingly */
		prevSame = (procWaitMode == lockmode);
		(aheadGranted[procWaitMode])++;
		if (aheadGranted[procWaitMode] == lock->requested[procWaitMode])
			waitMask &= ~(1 << procWaitMode);
V
Vadim B. Mikheev 已提交
627 628
		proc = (PROC *) MAKE_PTR(proc->links.prev);
	}
629

V
Vadim B. Mikheev 已提交
630
ins:;
631
	/* -------------------
632 633
	 * Insert self into queue, ahead of the given proc.
	 * These operations are atomic (because of the spinlock).
634 635 636
	 * -------------------
	 */
	SHMQueueInsertTL(&(proc->links), &(MyProc->links));
B
Bruce Momjian 已提交
637
	waitQueue->size++;
638

V
Vadim B. Mikheev 已提交
639
	lock->waitMask |= myMask;
640

641 642
	MyProc->errType = NO_ERROR;		/* initialize result for success */

643 644 645 646 647 648 649 650 651 652 653 654
	/* mark that we are waiting for a lock */
	waitingForLock = true;

	/* -------------------
	 * Release the locktable's spin lock.
	 *
	 * NOTE: this may also cause us to exit critical-section state,
	 * possibly allowing a cancel/die interrupt to be accepted.
	 * This is OK because we have recorded the fact that we are waiting for
	 * a lock, and so LockWaitCancel will clean up if cancel/die happens.
	 * -------------------
	 */
655 656
	SpinRelease(spinlock);

657
	/* --------------
658 659 660 661 662 663 664
	 * Set timer so we can wake up after awhile and check for a deadlock.
	 * If a deadlock is detected, the handler releases the process's
	 * semaphore and sets MyProc->errType = STATUS_ERROR, allowing us to
	 * know that we must report failure rather than success.
	 *
	 * By delaying the check until we've waited for a bit, we can avoid
	 * running the rather expensive deadlock-check code in most cases.
B
Bruce Momjian 已提交
665 666 667
	 *
	 * Need to zero out struct to set the interval and the micro seconds fields
	 * to 0.
668 669
	 * --------------
	 */
670
#ifndef __BEOS__
B
Bruce Momjian 已提交
671
	MemSet(&timeval, 0, sizeof(struct itimerval));
672 673
	timeval.it_value.tv_sec = DeadlockTimeout / 1000;
	timeval.it_value.tv_usec = (DeadlockTimeout % 1000) * 1000;
674 675
	if (setitimer(ITIMER_REAL, &timeval, &dummy))
		elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
676
#else
677 678 679
    time_interval = DeadlockTimeout * 1000000; /* usecs */
	if (set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM) < 0)
		elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
680
#endif
681

682 683 684 685 686 687 688
	/* --------------
	 * If someone wakes us between SpinRelease and IpcSemaphoreLock,
	 * IpcSemaphoreLock will not block.  The wakeup is "saved" by
	 * the semaphore implementation.  Note also that if HandleDeadLock
	 * is invoked but does not detect a deadlock, IpcSemaphoreLock()
	 * will continue to wait.  There used to be a loop here, but it
	 * was useless code...
689 690 691 692 693 694
	 *
	 * We pass interruptOK = true, which eliminates a window in which
	 * cancel/die interrupts would be held off undesirably.  This is a
	 * promise that we don't mind losing control to a cancel/die interrupt
	 * here.  We don't, because we have no state-change work to do after
	 * being granted the lock (the grantor did it all).
695 696
	 * --------------
	 */
697
	IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, true);
698

B
Bruce Momjian 已提交
699
	/* ---------------
700
	 * Disable the timer, if it's still running
B
Bruce Momjian 已提交
701 702
	 * ---------------
	 */
703
#ifndef __BEOS__
704
	MemSet(&timeval, 0, sizeof(struct itimerval));
B
Bruce Momjian 已提交
705
	if (setitimer(ITIMER_REAL, &timeval, &dummy))
706
		elog(FATAL, "ProcSleep: Unable to disable timer for process wakeup");
707 708
#else
    if (set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM) < 0)
709
		elog(FATAL, "ProcSleep: Unable to disable timer for process wakeup");
710
#endif
B
Bruce Momjian 已提交
711

712 713 714 715 716
	/*
	 * Now there is nothing for LockWaitCancel to do.
	 */
	waitingForLock = false;

717
	/* ----------------
718 719 720 721
	 * Re-acquire the locktable's spin lock.
	 *
	 * We could accept a cancel/die interrupt here.  That's OK because
	 * the lock is now registered as being held by this process.
722 723 724 725
	 * ----------------
	 */
	SpinAcquire(spinlock);

V
Vadim B. Mikheev 已提交
726 727
rt:;

728 729
	MyProc->waitLock = NULL;
	MyProc->waitHolder = NULL;
M
 
Marc G. Fournier 已提交
730

731
	return MyProc->errType;
732 733 734 735 736 737
}


/*
 * ProcWakeup -- wake up a process by releasing its private semaphore.
 *
738
 *	 Also remove the process from the wait queue and set its links invalid.
739
 *	 RETURN: the next process in the wait queue.
740
 */
B
Bruce Momjian 已提交
741
PROC *
742
ProcWakeup(PROC *proc, int errType)
743
{
744
	PROC	   *retProc;
745 746 747 748 749

	/* assume that spinlock has been acquired */

	if (proc->links.prev == INVALID_OFFSET ||
		proc->links.next == INVALID_OFFSET)
750
		return (PROC *) NULL;
751 752 753 754 755

	retProc = (PROC *) MAKE_PTR(proc->links.prev);

	SHMQueueDelete(&(proc->links));
	SHMQueueElemInit(&(proc->links));
756
	(proc->waitLock->waitProcs.size)--;
757 758 759

	proc->errType = errType;

760
	IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
761 762

	return retProc;
763 764 765 766
}

/*
 * ProcLockWakeup -- routine for waking up processes when a lock is
767
 *		released.
768 769
 */
int
770
ProcLockWakeup(LOCKMETHOD lockmethod, LOCK *lock)
771
{
772
	PROC_QUEUE *queue = &(lock->waitProcs);
773
	PROC	   *proc;
774 775
	int			awoken = 0;
	LOCKMODE	last_lockmode = 0;
M
 
Marc G. Fournier 已提交
776 777
	int			queue_size = queue->size;

778
	Assert(queue_size >= 0);
779

780
	if (!queue_size)
781
		return STATUS_NOT_FOUND;
782 783

	proc = (PROC *) MAKE_PTR(queue->links.prev);
784

785 786 787 788 789 790 791 792 793 794
	while (queue_size-- > 0)
	{
		if (proc->waitLockMode == last_lockmode)
		{
			/*
			 * This proc will conflict as the previous one did, don't even
			 * try.
			 */
			goto nextProc;
		}
M
 
Marc G. Fournier 已提交
795 796

		/*
V
Vadim B. Mikheev 已提交
797
		 * Does this proc conflict with locks held by others ?
M
 
Marc G. Fournier 已提交
798 799
		 */
		if (LockResolveConflicts(lockmethod,
800
								 proc->waitLockMode,
801
								 lock,
802 803 804
								 proc->waitHolder,
								 proc,
								 NULL) != STATUS_OK)
M
 
Marc G. Fournier 已提交
805
		{
806 807
			/* Yes.  Quit if we already awoke at least one process. */
			if (awoken != 0)
V
Vadim B. Mikheev 已提交
808
				break;
809 810 811
			/* Otherwise, see if any later waiters can be awoken. */
			last_lockmode = proc->waitLockMode;
			goto nextProc;
M
 
Marc G. Fournier 已提交
812
		}
813 814

		/*
815
		 * OK to wake up this sleeping process.
816
		 */
817 818 819
		GrantLock(lock, proc->waitHolder, proc->waitLockMode);
		proc = ProcWakeup(proc, NO_ERROR);
		awoken++;
820 821

		/*
822 823
		 * ProcWakeup removes proc from the lock's waiting process queue
		 * and returns the next proc in chain; don't use prev link.
824
		 */
825
		continue;
826

827 828
nextProc:
		proc = (PROC *) MAKE_PTR(proc->links.prev);
829
	}
830

M
 
Marc G. Fournier 已提交
831 832
	Assert(queue->size >= 0);

833
	if (awoken)
834
		return STATUS_OK;
835 836
	else
	{
837
		/* Something is still blocking us.	May have deadlocked. */
838 839 840
#ifdef LOCK_DEBUG
		if (lock->tag.lockmethod == USER_LOCKMETHOD ? Trace_userlocks : Trace_locks)
		{
841 842
			elog(DEBUG, "ProcLockWakeup: lock(%lx) can't wake up any process",
				 MAKE_OFFSET(lock));
843
			if (Debug_deadlocks)
844
				DumpAllLocks();
845
		}
M
 
Marc G. Fournier 已提交
846
#endif
847
		return STATUS_NOT_FOUND;
M
 
Marc G. Fournier 已提交
848
	}
849 850 851
}

void
852
ProcAddLock(SHM_QUEUE *elem)
853
{
854
	SHMQueueInsertTL(&MyProc->holderQueue, elem);
855 856 857
}

/* --------------------
858
 * We only get to this routine if we got SIGALRM after DeadlockTimeout
859 860 861 862
 * while waiting for a lock to be released by some other process.  Look
 * to see if there's a deadlock; if not, just return and continue waiting.
 * If we have a real deadlock, remove ourselves from the lock's wait queue
 * and signal an error to ProcSleep.
863 864
 * --------------------
 */
865
void
866
HandleDeadLock(SIGNAL_ARGS)
867
{
868
	int			save_errno = errno;
869

870 871 872 873 874 875 876
	/*
	 * Acquire locktable lock.  Note that the SIGALRM interrupt had better
	 * not be enabled anywhere that this process itself holds the locktable
	 * lock, else this will wait forever.  Also note that this calls
	 * SpinAcquire which creates a critical section, so that this routine
	 * cannot be interrupted by cancel/die interrupts.
	 */
877 878 879 880 881 882 883 884 885
	LockLockTable();

	/* ---------------------
	 * Check to see if we've been awoken by anyone in the interim.
	 *
	 * If we have we can return and resume our transaction -- happy day.
	 * Before we are awoken the process releasing the lock grants it to
	 * us so we know that we don't have to wait anymore.
	 *
886 887 888
	 * We check by looking to see if we've been unlinked from the wait queue.
	 * This is quicker than checking our semaphore's state, since no kernel
	 * call is needed, and it is safe because we hold the locktable lock.
889 890 891 892 893 894
	 * ---------------------
	 */
	if (MyProc->links.prev == INVALID_OFFSET ||
		MyProc->links.next == INVALID_OFFSET)
	{
		UnlockLockTable();
895
		errno = save_errno;
896 897 898
		return;
	}

899 900 901
#ifdef LOCK_DEBUG
    if (Debug_deadlocks)
        DumpAllLocks();
902 903
#endif

B
Bruce Momjian 已提交
904
	if (!DeadLockCheck(MyProc, MyProc->waitLock))
B
Bruce Momjian 已提交
905
	{
906
		/* No deadlock, so keep waiting */
B
Bruce Momjian 已提交
907
		UnlockLockTable();
908
		errno = save_errno;
B
Bruce Momjian 已提交
909 910 911
		return;
	}

912
	/* ------------------------
913 914 915
	 * Oops.  We have a deadlock.
	 *
	 * Get this process out of wait state.
916 917
	 * ------------------------
	 */
918 919 920 921 922 923 924 925
	RemoveFromWaitQueue(MyProc);

	/* -------------
	 * Set MyProc->errType to STATUS_ERROR so that ProcSleep will
	 * report an error after we return from this signal handler.
	 * -------------
	 */
	MyProc->errType = STATUS_ERROR;
926 927

	/* ------------------
928
	 * Unlock my semaphore so that the interrupted ProcSleep() call can finish.
929 930
	 * ------------------
	 */
931
	IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum);
932

933 934 935 936 937 938 939 940 941 942
	/* ------------------
	 * We're done here.  Transaction abort caused by the error that ProcSleep
	 * will raise will cause any other locks we hold to be released, thus
	 * allowing other processes to wake up; we don't need to do that here.
	 * NOTE: an exception is that releasing locks we hold doesn't consider
	 * the possibility of waiters that were blocked behind us on the lock
	 * we just failed to get, and might now be wakable because we're not
	 * in front of them anymore.  However, RemoveFromWaitQueue took care of
	 * waking up any such processes.
	 * ------------------
943 944
	 */
	UnlockLockTable();
945
	errno = save_errno;
946 947 948
}

void
949
ProcReleaseSpins(PROC *proc)
950
{
951
	int			i;
952 953 954 955 956 957 958

	if (!proc)
		proc = MyProc;

	if (!proc)
		return;
	for (i = 0; i < (int) MAX_SPINS; i++)
959
	{
960
		if (proc->sLocks[i])
961
		{
962 963
			Assert(proc->sLocks[i] == 1);
			SpinRelease(i);
964 965
		}
	}
H
 
Hiroshi Inoue 已提交
966
	AbortBufferIO();
967 968 969
}

/*****************************************************************************
970
 *
971 972 973
 *****************************************************************************/

/*
974
 * ProcGetNewSemIdAndNum -
975
 *	  scan the free semaphore bitmap and allocate a single semaphore from
976
 *	  a semaphore set.
977 978
 */
static void
979
ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum)
980
{
981
	int			i;
982
	IpcSemaphoreId *procSemIds = ProcGlobal->procSemIds;
983
	int32	   *freeSemMap = ProcGlobal->freeSemMap;
984
	int32		fullmask = (1 << PROC_NSEMS_PER_SET) - 1;
985

986 987 988 989
	/*
	 * we hold ProcStructLock when entering this routine. We scan through
	 * the bitmap to look for a free semaphore.
	 */
990

991
	for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
992
	{
993 994
		int			mask = 1;
		int			j;
995 996

		if (freeSemMap[i] == fullmask)
997
			continue;			/* this set is fully allocated */
998 999
		if (procSemIds[i] < 0)
			continue;			/* this set hasn't been initialized */
1000 1001 1002 1003 1004 1005 1006

		for (j = 0; j < PROC_NSEMS_PER_SET; j++)
		{
			if ((freeSemMap[i] & mask) == 0)
			{

				/*
1007
				 * a free semaphore found. Mark it as allocated.
1008
				 */
1009
				freeSemMap[i] |= mask;
1010

1011
				*semId = procSemIds[i];
1012 1013 1014 1015 1016
				*semNum = j;
				return;
			}
			mask <<= 1;
		}
1017 1018
	}

1019
	/* if we reach here, all the semaphores are in use. */
1020
	elog(ERROR, "ProcGetNewSemIdAndNum: cannot allocate a free semaphore");
1021 1022 1023 1024
}

/*
 * ProcFreeSem -
1025
 *	  free up our semaphore in the semaphore set.
1026 1027
 */
static void
1028
ProcFreeSem(IpcSemaphoreId semId, int semNum)
1029
{
1030
	int32		mask;
1031
	int			i;
1032

1033
	mask = ~(1 << semNum);
1034

1035 1036 1037 1038 1039 1040 1041 1042 1043
	for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
	{
		if (ProcGlobal->procSemIds[i] == semId)
		{
			ProcGlobal->freeSemMap[i] &= mask;
			return;
		}
	}
	fprintf(stderr, "ProcFreeSem: no ProcGlobal entry for semId %d\n", semId);
1044 1045 1046 1047
}

/*
 * ProcFreeAllSemaphores -
1048 1049 1050
 *	  called at shmem_exit time, ie when exiting the postmaster or
 *	  destroying shared state for a failed set of backends.
 *	  Free up all the semaphores allocated to the lmgrs of the backends.
1051
 */
1052
static void
1053
ProcFreeAllSemaphores(void)
1054
{
1055
	int			i;
1056

1057
	for (i = 0; i < PROC_SEM_MAP_ENTRIES; i++)
1058
	{
1059 1060
		if (ProcGlobal->procSemIds[i] >= 0)
			IpcSemaphoreKill(ProcGlobal->procSemIds[i]);
1061
	}
1062
}