proc.c 34.0 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * proc.c
4
 *	  routines to manage per-process shared memory data structure
5
 *
P
 
PostgreSQL Daemon 已提交
6
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.163 2005/08/20 23:26:24 tgl Exp $
12 13 14 15 16
 *
 *-------------------------------------------------------------------------
 */
/*
 * Interface (a):
17
 *		ProcSleep(), ProcWakeup(),
18 19
 *		ProcQueueAlloc() -- create a shm queue for sleeping processes
 *		ProcQueueInit() -- create a queue without allocing memory
20 21 22 23 24 25 26 27
 *
 * Locking and waiting for buffers can cause the backend to be
 * put to sleep.  Whoever releases the lock, etc. wakes the
 * process up again (and gives it an error code so it knows
 * whether it was awoken on an error condition).
 *
 * Interface (b):
 *
28 29
 * ProcReleaseLocks -- frees the locks associated with current transaction
 *
30
 * ProcKill -- destroys the shared memory state (and locks)
31
 *		associated with the process.
32 33
 *
 * 5/15/91 -- removed the buffer pool based lock chain in favor
34 35 36 37 38 39
 *		of a shared memory lock chain.	The write-protection is
 *		more expensive if the lock chain is in the buffer pool.
 *		The only reason I kept the lock chain in the buffer pool
 *		in the first place was to allow the lock table to grow larger
 *		than available shared memory and that isn't going to work
 *		without a lot of unimplemented support anyway.
40
 */
41 42
#include "postgres.h"

43
#include <signal.h>
44 45
#include <unistd.h>
#include <sys/time.h>
M
Marc G. Fournier 已提交
46

47
#include "miscadmin.h"
48
#include "access/xact.h"
49
#include "storage/bufmgr.h"
50
#include "storage/ipc.h"
51
#include "storage/proc.h"
52
#include "storage/procarray.h"
53
#include "storage/spin.h"
54

55

56
/* GUC variables */
B
Bruce Momjian 已提交
57
int			DeadlockTimeout = 1000;
58
int			StatementTimeout = 0;
M
 
Marc G. Fournier 已提交
59

60
/* Pointer to this process's PGPROC struct, if any */
J
Jan Wieck 已提交
61
PGPROC	   *MyProc = NULL;
62 63

/*
J
Jan Wieck 已提交
64
 * This spinlock protects the freelist of recycled PGPROC structures.
65
 * We cannot use an LWLock because the LWLock manager depends on already
J
Jan Wieck 已提交
66
 * having a PGPROC and a wait semaphore!  But these structures are touched
67 68
 * relatively infrequently (only at backend startup or shutdown) and not for
 * very long, so a spinlock is okay.
69
 */
70
NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
71

72
/* Pointers to shared-memory structures */
73
static PROC_HDR *ProcGlobal = NULL;
74
static PGPROC *DummyProcs = NULL;
75

76 77
static bool waitingForLock = false;

78 79 80
/* Mark these volatile because they can be changed by signal handler */
static volatile bool statement_timeout_active = false;
static volatile bool deadlock_timeout_active = false;
B
Bruce Momjian 已提交
81

82 83 84 85
/* statement_fin_time is valid only if statement_timeout_active is true */
static struct timeval statement_fin_time;


86 87
static void ProcKill(int code, Datum arg);
static void DummyProcKill(int code, Datum arg);
88
static bool CheckStatementTimeout(void);
89

V
Vadim B. Mikheev 已提交
90

91 92 93
/*
 * Report shared-memory space needed by InitProcGlobal.
 */
94
Size
95
ProcGlobalShmemSize(void)
96
{
97 98 99 100 101 102 103 104 105 106
	Size		size = 0;

	/* ProcGlobal */
	size = add_size(size, sizeof(PROC_HDR));
	/* DummyProcs */
	size = add_size(size, mul_size(NUM_DUMMY_PROCS, sizeof(PGPROC)));
	/* MyProcs */
	size = add_size(size, mul_size(MaxBackends, sizeof(PGPROC)));
	/* ProcStructLock */
	size = add_size(size, sizeof(slock_t));
107 108 109 110

	return size;
}

111 112 113 114
/*
 * Report number of semaphores needed by InitProcGlobal.
 */
int
115
ProcGlobalSemas(void)
116
{
117
	/* We need a sema per backend, plus one for each dummy process. */
118
	return MaxBackends + NUM_DUMMY_PROCS;
119 120
}

121 122
/*
 * InitProcGlobal -
123
 *	  Initialize the global process table during postmaster startup.
124
 *
125
 *	  We also create all the per-process semaphores we will need to support
126 127 128 129 130 131 132 133 134
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
 *	  (mis)configured with small limits on the number of semaphores, and
 *	  running out when trying to start another backend is a common failure.
 *	  So, now we grab enough semaphores to support the desired max number
 *	  of backends immediately at initialization --- if the sysadmin has set
 *	  MaxBackends higher than his kernel will support, he'll find out sooner
 *	  rather than later.
135 136 137 138
 *
 *	  Another reason for creating semaphores here is that the semaphore
 *	  implementation typically requires us to create semaphores in the
 *	  postmaster, not in backends.
139 140
 */
void
141
InitProcGlobal(void)
142
{
B
Bruce Momjian 已提交
143 144
	bool		foundProcGlobal,
				foundDummy;
145

146
	/* Create or attach to the ProcGlobal shared structure */
147
	ProcGlobal = (PROC_HDR *)
148
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &foundProcGlobal);
149

150
	/*
151
	 * Create or attach to the PGPROC structures for dummy (bgwriter)
152
	 * processes, too.	These do not get linked into the freeProcs list.
153
	 */
154
	DummyProcs = (PGPROC *)
155
		ShmemInitStruct("DummyProcs", NUM_DUMMY_PROCS * sizeof(PGPROC),
156
						&foundDummy);
157 158 159 160 161 162 163

	if (foundProcGlobal || foundDummy)
	{
		/* both should be present or neither */
		Assert(foundProcGlobal && foundDummy);
	}
	else
164
	{
165 166 167
		/*
		 * We're the first - initialize.
		 */
168
		PGPROC	   *procs;
169
		int			i;
170

171
		ProcGlobal->freeProcs = INVALID_OFFSET;
172

B
Bruce Momjian 已提交
173
		/*
B
Bruce Momjian 已提交
174 175
		 * Pre-create the PGPROC structures and create a semaphore for
		 * each.
176
		 */
177
		procs = (PGPROC *) ShmemAlloc(MaxBackends * sizeof(PGPROC));
178 179 180 181
		if (!procs)
			ereport(FATAL,
					(errcode(ERRCODE_OUT_OF_MEMORY),
					 errmsg("out of shared memory")));
182 183
		MemSet(procs, 0, MaxBackends * sizeof(PGPROC));
		for (i = 0; i < MaxBackends; i++)
184
		{
185 186 187
			PGSemaphoreCreate(&(procs[i].sem));
			procs[i].links.next = ProcGlobal->freeProcs;
			ProcGlobal->freeProcs = MAKE_OFFSET(&procs[i]);
188
		}
189

190
		MemSet(DummyProcs, 0, NUM_DUMMY_PROCS * sizeof(PGPROC));
J
Jan Wieck 已提交
191 192
		for (i = 0; i < NUM_DUMMY_PROCS; i++)
		{
193 194
			DummyProcs[i].pid = 0;		/* marks dummy proc as not in use */
			PGSemaphoreCreate(&(DummyProcs[i].sem));
J
Jan Wieck 已提交
195
		}
196 197 198 199

		/* Create ProcStructLock spinlock, too */
		ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
		SpinLockInit(ProcStructLock);
200 201 202
	}
}

203
/*
204
 * InitProcess -- initialize a per-process data structure for this backend
205 206
 */
void
207
InitProcess(void)
208
{
209
	SHMEM_OFFSET myOffset;
B
Bruce Momjian 已提交
210

211 212
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;
213 214

	/*
215 216
	 * ProcGlobal should be set by a previous call to InitProcGlobal (if
	 * we are a backend, we inherit this by fork() from the postmaster).
217
	 */
218
	if (procglobal == NULL)
219
		elog(PANIC, "proc header uninitialized");
220 221

	if (MyProc != NULL)
222
		elog(ERROR, "you already exist");
223

224
	/*
B
Bruce Momjian 已提交
225 226
	 * Try to get a proc struct from the free list.  If this fails, we
	 * must be out of PGPROC structures (not to mention semaphores).
227
	 */
228
	SpinLockAcquire(ProcStructLock);
229

230
	myOffset = procglobal->freeProcs;
231 232

	if (myOffset != INVALID_OFFSET)
233
	{
J
Jan Wieck 已提交
234
		MyProc = (PGPROC *) MAKE_PTR(myOffset);
235
		procglobal->freeProcs = MyProc->links.next;
236
		SpinLockRelease(ProcStructLock);
237 238 239 240
	}
	else
	{
		/*
J
Jan Wieck 已提交
241
		 * If we reach here, all the PGPROCs are in use.  This is one of
242 243
		 * the possible places to detect "too many backends", so give the
		 * standard error message.
244
		 */
245
		SpinLockRelease(ProcStructLock);
246 247 248
		ereport(FATAL,
				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
				 errmsg("sorry, too many clients already")));
249
	}
250

251
	/*
B
Bruce Momjian 已提交
252 253
	 * Initialize all fields of MyProc, except for the semaphore which was
	 * prepared for us by InitProcGlobal.
254
	 */
255
	SHMQueueElemInit(&(MyProc->links));
256
	MyProc->waitStatus = STATUS_OK;
257
	MyProc->xid = InvalidTransactionId;
258
	MyProc->xmin = InvalidTransactionId;
259 260
	MyProc->pid = MyProcPid;
	MyProc->databaseId = MyDatabaseId;
261 262
	/* Will be set properly after the session role id is determined */
	MyProc->roleId = InvalidOid;
263 264 265
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
266
	MyProc->waitLock = NULL;
267 268
	MyProc->waitProcLock = NULL;
	SHMQueueInit(&(MyProc->procLocks));
269

270 271 272
	/*
	 * Add our PGPROC to the PGPROC array in shared memory.
	 */
273
	ProcArrayAdd(MyProc);
274

275
	/*
276
	 * Arrange to clean up at backend exit.
277
	 */
278
	on_shmem_exit(ProcKill, 0);
279

280
	/*
281
	 * We might be reusing a semaphore that belonged to a failed process.
282 283
	 * So be careful and reinitialize its value here.
	 */
284
	PGSemaphoreReset(&MyProc->sem);
285

286
	/*
J
Jan Wieck 已提交
287
	 * Now that we have a PGPROC, we could try to acquire locks, so
B
Bruce Momjian 已提交
288
	 * initialize the deadlock checker.
289 290
	 */
	InitDeadLockChecking();
291 292
}

293 294 295
/*
 * InitDummyProcess -- create a dummy per-process data structure
 *
296 297 298 299
 * This is called by bgwriter and similar processes so that they will have a
 * MyProc value that's real enough to let them wait for LWLocks.  The PGPROC
 * and sema that are assigned are the extra ones created during
 * InitProcGlobal.
300 301 302
 *
 * Dummy processes are presently not expected to wait for real (lockmgr)
 * locks, nor to participate in sinval messaging.
303 304
 */
void
J
Jan Wieck 已提交
305
InitDummyProcess(int proctype)
306
{
B
Bruce Momjian 已提交
307
	PGPROC	   *dummyproc;
J
Jan Wieck 已提交
308

309
	/*
310 311
	 * ProcGlobal should be set by a previous call to InitProcGlobal (we
	 * inherit this by fork() from the postmaster).
312
	 */
313
	if (ProcGlobal == NULL || DummyProcs == NULL)
314
		elog(PANIC, "proc header uninitialized");
315 316

	if (MyProc != NULL)
317
		elog(ERROR, "you already exist");
318

319 320 321
	Assert(proctype >= 0 && proctype < NUM_DUMMY_PROCS);

	dummyproc = &DummyProcs[proctype];
J
Jan Wieck 已提交
322

323
	/*
J
Jan Wieck 已提交
324
	 * dummyproc should not presently be in use by anyone else
325
	 */
J
Jan Wieck 已提交
326 327
	if (dummyproc->pid != 0)
		elog(FATAL, "DummyProc[%d] is in use by PID %d",
328
			 proctype, dummyproc->pid);
J
Jan Wieck 已提交
329
	MyProc = dummyproc;
330 331

	/*
332 333
	 * Initialize all fields of MyProc, except MyProc->sem which was set
	 * up by InitProcGlobal.
334
	 */
335
	MyProc->pid = MyProcPid;	/* marks dummy proc as in use by me */
336
	SHMQueueElemInit(&(MyProc->links));
337
	MyProc->waitStatus = STATUS_OK;
338 339 340
	MyProc->xid = InvalidTransactionId;
	MyProc->xmin = InvalidTransactionId;
	MyProc->databaseId = MyDatabaseId;
341
	MyProc->roleId = InvalidOid;
342 343 344 345
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
	MyProc->waitLock = NULL;
346 347
	MyProc->waitProcLock = NULL;
	SHMQueueInit(&(MyProc->procLocks));
348 349 350 351

	/*
	 * Arrange to clean up at process exit.
	 */
352
	on_shmem_exit(DummyProcKill, Int32GetDatum(proctype));
353 354 355 356 357

	/*
	 * We might be reusing a semaphore that belonged to a failed process.
	 * So be careful and reinitialize its value here.
	 */
358
	PGSemaphoreReset(&MyProc->sem);
359 360
}

361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
/*
 * Check whether there are at least N free PGPROC objects.
 *
 * Note: this is designed on the assumption that N will generally be small.
 */
bool
HaveNFreeProcs(int n)
{
	SHMEM_OFFSET offset;
	PGPROC	   *proc;
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

	SpinLockAcquire(ProcStructLock);

	offset = procglobal->freeProcs;

	while (n > 0 && offset != INVALID_OFFSET)
	{
		proc = (PGPROC *) MAKE_PTR(offset);
		offset = proc->links.next;
		n--;
	}

	SpinLockRelease(ProcStructLock);

	return (n <= 0);
}

390 391 392
/*
 * Cancel any pending wait for lock, when aborting a transaction.
 *
393 394
 * Returns true if we had been waiting for a lock, else false.
 *
395
 * (Normally, this would only happen if we accept a cancel/die
396
 * interrupt while waiting; but an ereport(ERROR) while waiting is
397 398
 * within the realm of possibility, too.)
 */
399
bool
400 401 402 403
LockWaitCancel(void)
{
	/* Nothing to do if we weren't waiting for a lock */
	if (!waitingForLock)
404 405
		return false;

406
	/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
407
	disable_sig_alarm(false);
408 409

	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
410
	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
411

412
	if (MyProc->links.next != INVALID_OFFSET)
413 414 415
	{
		/* We could not have been granted the lock yet */
		Assert(MyProc->waitStatus == STATUS_ERROR);
416
		RemoveFromWaitQueue(MyProc);
417 418 419 420 421
	}
	else
	{
		/*
		 * Somebody kicked us off the lock queue already.  Perhaps they
B
Bruce Momjian 已提交
422 423 424
		 * granted us the lock, or perhaps they detected a deadlock. If
		 * they did grant us the lock, we'd better remember it in our
		 * local lock table.
425
		 */
426 427
		if (MyProc->waitStatus == STATUS_OK)
			GrantAwaitedLock();
428 429 430 431
	}

	waitingForLock = false;

432
	LWLockRelease(LockMgrLock);
H
Hiroshi Inoue 已提交
433

434 435 436
	/*
	 * Reset the proc wait semaphore to zero.  This is necessary in the
	 * scenario where someone else granted us the lock we wanted before we
B
Bruce Momjian 已提交
437 438 439 440 441
	 * were able to remove ourselves from the wait-list.  The semaphore
	 * will have been bumped to 1 by the would-be grantor, and since we
	 * are no longer going to wait on the sema, we have to force it back
	 * to zero. Otherwise, our next attempt to wait for a lock will fall
	 * through prematurely.
442
	 */
443
	PGSemaphoreReset(&MyProc->sem);
444 445

	/*
B
Bruce Momjian 已提交
446 447
	 * Return true even if we were kicked off the lock before we were able
	 * to remove ourselves.
448 449
	 */
	return true;
H
Hiroshi Inoue 已提交
450
}
451

452

453
/*
454
 * ProcReleaseLocks() -- release locks associated with current transaction
455
 *			at main transaction commit or abort
456 457 458 459 460 461
 *
 * At main transaction commit, we release all locks except session locks.
 * At main transaction abort, we release all locks including session locks;
 * this lets us clean up after a VACUUM FULL failure.
 *
 * At subtransaction commit, we don't release any locks (so this func is not
462
 * needed at all); we will defer the releasing to the parent transaction.
463
 * At subtransaction abort, we release all locks held by the subtransaction;
464 465
 * this is implemented by retail releasing of the locks under control of
 * the ResourceOwner mechanism.
466 467
 *
 * Note that user locks are not released in any case.
468 469
 */
void
470
ProcReleaseLocks(bool isCommit)
471
{
472 473
	if (!MyProc)
		return;
474 475 476
	/* If waiting, get off wait queue (should only be needed after error) */
	LockWaitCancel();
	/* Release locks */
477
	LockReleaseAll(DEFAULT_LOCKMETHOD, !isCommit);
478 479 480 481 482
}


/*
 * ProcKill() -- Destroy the per-proc data structure for
483
 *		this process. Release any of its held LW locks.
484 485
 */
static void
486
ProcKill(int code, Datum arg)
487
{
488 489 490
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

491
	Assert(MyProc != NULL);
492

493
	/*
494 495 496
	 * Release any LW locks I am holding.  There really shouldn't be any,
	 * but it's cheap to check again before we cut the knees off the LWLock
	 * facility by releasing our PGPROC ...
497
	 */
498
	LWLockReleaseAll();
499

500
	/* Remove our PGPROC from the PGPROC array in shared memory */
501
	ProcArrayRemove(MyProc);
502

503
	SpinLockAcquire(ProcStructLock);
504

J
Jan Wieck 已提交
505
	/* Return PGPROC structure (and semaphore) to freelist */
506 507
	MyProc->links.next = procglobal->freeProcs;
	procglobal->freeProcs = MAKE_OFFSET(MyProc);
508

J
Jan Wieck 已提交
509
	/* PGPROC struct isn't mine anymore */
510
	MyProc = NULL;
511

512 513 514 515
	SpinLockRelease(ProcStructLock);
}

/*
516
 * DummyProcKill() -- Cut-down version of ProcKill for dummy (bgwriter)
J
Jan Wieck 已提交
517
 *		processes.	The PGPROC and sema are not released, only marked
518 519 520
 *		as not-in-use.
 */
static void
521
DummyProcKill(int code, Datum arg)
522
{
B
Bruce Momjian 已提交
523 524
	int			proctype = DatumGetInt32(arg);
	PGPROC	   *dummyproc;
J
Jan Wieck 已提交
525

526
	Assert(proctype >= 0 && proctype < NUM_DUMMY_PROCS);
J
Jan Wieck 已提交
527

528
	dummyproc = &DummyProcs[proctype];
J
Jan Wieck 已提交
529

530
	Assert(MyProc == dummyproc);
531

532
	/* Release any LW locks I am holding (see notes above) */
533 534
	LWLockReleaseAll();

535
	/* Mark dummy proc no longer in use */
536 537
	MyProc->pid = 0;

J
Jan Wieck 已提交
538
	/* PGPROC struct isn't mine anymore */
539
	MyProc = NULL;
540 541
}

542

543 544
/*
 * ProcQueue package: routines for putting processes to sleep
545
 *		and  waking them up
546 547 548 549 550 551 552 553
 */

/*
 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
 *
 * Returns: a pointer to the queue or NULL
 * Side Effects: Initializes the queue if we allocated one
 */
554
#ifdef NOT_USED
555
PROC_QUEUE *
556 557
ProcQueueAlloc(char *name)
{
558 559
	bool		found;
	PROC_QUEUE *queue = (PROC_QUEUE *)
B
Bruce Momjian 已提交
560
	ShmemInitStruct(name, sizeof(PROC_QUEUE), &found);
561 562

	if (!queue)
563
		return NULL;
564 565
	if (!found)
		ProcQueueInit(queue);
566
	return queue;
567
}
568
#endif
569 570 571 572 573

/*
 * ProcQueueInit -- initialize a shared memory process queue
 */
void
574
ProcQueueInit(PROC_QUEUE *queue)
575
{
576 577
	SHMQueueInit(&(queue->links));
	queue->size = 0;
578 579 580 581 582 583
}


/*
 * ProcSleep -- put a process to sleep
 *
584 585
 * Caller must have set MyProc->heldLocks to reflect locks already held
 * on the lockable object by this process (under all XIDs).
586
 *
587
 * Locktable's masterLock must be held at entry, and will be held
588
 * at exit.
589
 *
590
 * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
591
 *
592
 * ASSUME: that no one will fiddle with the queue until after
593
 *		we release the masterLock.
594 595
 *
 * NOTES: The process queue is now a priority queue for locking.
596 597 598
 *
 * P() on the semaphore should put us to sleep.  The process
 * semaphore is normally zero, so when we try to acquire it, we sleep.
599 600
 */
int
601
ProcSleep(LockMethod lockMethodTable,
602 603
		  LOCKMODE lockmode,
		  LOCK *lock,
604
		  PROCLOCK *proclock)
605
{
B
Bruce Momjian 已提交
606
	LWLockId	masterLock = lockMethodTable->masterLock;
607
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
608
	LOCKMASK	myHeldLocks = MyProc->heldLocks;
609
	bool		early_deadlock = false;
J
Jan Wieck 已提交
610
	PGPROC	   *proc;
611
	int			i;
612

613
	/*
614 615 616 617 618 619
	 * Determine where to add myself in the wait queue.
	 *
	 * Normally I should go at the end of the queue.  However, if I already
	 * hold locks that conflict with the request of any previous waiter,
	 * put myself in the queue just in front of the first such waiter.
	 * This is not a necessary step, since deadlock detection would move
620 621 622
	 * me to before that waiter anyway; but it's relatively cheap to
	 * detect such a conflict immediately, and avoid delaying till
	 * deadlock timeout.
623
	 *
624 625
	 * Special case: if I find I should go in front of some waiter, check to
	 * see if I conflict with already-held locks or the requests before
626 627
	 * that waiter.  If not, then just grant myself the requested lock
	 * immediately.  This is the same as the test for immediate grant in
628 629
	 * LockAcquire, except we are only considering the part of the wait
	 * queue before my insertion point.
630 631
	 */
	if (myHeldLocks != 0)
V
Vadim B. Mikheev 已提交
632
	{
633
		LOCKMASK	aheadRequests = 0;
634

J
Jan Wieck 已提交
635
		proc = (PGPROC *) MAKE_PTR(waitQueue->links.next);
636
		for (i = 0; i < waitQueue->size; i++)
V
Vadim B. Mikheev 已提交
637
		{
638
			/* Must he wait for me? */
B
Bruce Momjian 已提交
639
			if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks)
V
Vadim B. Mikheev 已提交
640
			{
641
				/* Must I wait for him ? */
B
Bruce Momjian 已提交
642
				if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks)
643
				{
644
					/*
645 646 647 648
					 * Yes, so we have a deadlock.	Easiest way to clean
					 * up correctly is to call RemoveFromWaitQueue(), but
					 * we can't do that until we are *on* the wait queue.
					 * So, set a flag to check below, and break out of
B
Bruce Momjian 已提交
649 650
					 * loop.  Also, record deadlock info for later
					 * message.
651
					 */
652
					RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
653 654
					early_deadlock = true;
					break;
655
				}
656
				/* I must go before this waiter.  Check special case. */
B
Bruce Momjian 已提交
657
				if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
658 659 660
					LockCheckConflicts(lockMethodTable,
									   lockmode,
									   lock,
661
									   proclock,
662
									   MyProc) == STATUS_OK)
663
				{
664
					/* Skip the wait and just grant myself the lock. */
665
					GrantLock(lock, proclock, lockmode);
666
					GrantAwaitedLock();
667
					return STATUS_OK;
668 669
				}
				/* Break out of loop to put myself before him */
V
Vadim B. Mikheev 已提交
670
				break;
671
			}
672
			/* Nope, so advance to next waiter */
673
			aheadRequests |= LOCKBIT_ON(proc->waitLockMode);
J
Jan Wieck 已提交
674
			proc = (PGPROC *) MAKE_PTR(proc->links.next);
V
Vadim B. Mikheev 已提交
675
		}
B
Bruce Momjian 已提交
676

677 678 679 680
		/*
		 * If we fall out of loop normally, proc points to waitQueue head,
		 * so we will insert at tail of queue as desired.
		 */
681 682 683 684
	}
	else
	{
		/* I hold no locks, so I can't push in front of anyone. */
J
Jan Wieck 已提交
685
		proc = (PGPROC *) &(waitQueue->links);
V
Vadim B. Mikheev 已提交
686
	}
687

688 689 690
	/*
	 * Insert self into queue, ahead of the given proc (or at tail of
	 * queue).
691
	 */
692
	SHMQueueInsertBefore(&(proc->links), &(MyProc->links));
B
Bruce Momjian 已提交
693
	waitQueue->size++;
694

695
	lock->waitMask |= LOCKBIT_ON(lockmode);
696

J
Jan Wieck 已提交
697
	/* Set up wait information in PGPROC object, too */
698
	MyProc->waitLock = lock;
699
	MyProc->waitProcLock = proclock;
700 701
	MyProc->waitLockMode = lockmode;

702
	MyProc->waitStatus = STATUS_ERROR;	/* initialize result for error */
703 704 705

	/*
	 * If we detected deadlock, give up without waiting.  This must agree
706
	 * with CheckDeadLock's recovery code, except that we shouldn't
707
	 * release the semaphore since we haven't tried to lock it yet.
708 709 710 711 712 713
	 */
	if (early_deadlock)
	{
		RemoveFromWaitQueue(MyProc);
		return STATUS_ERROR;
	}
714

715 716 717
	/* mark that we are waiting for a lock */
	waitingForLock = true;

718
	/*
719
	 * Release the locktable's masterLock.
720
	 *
721 722 723 724
	 * NOTE: this may also cause us to exit critical-section state, possibly
	 * allowing a cancel/die interrupt to be accepted. This is OK because
	 * we have recorded the fact that we are waiting for a lock, and so
	 * LockWaitCancel will clean up if cancel/die happens.
725
	 */
726
	LWLockRelease(masterLock);
727

728
	/*
729 730
	 * Set timer so we can wake up after awhile and check for a deadlock.
	 * If a deadlock is detected, the handler releases the process's
B
Bruce Momjian 已提交
731 732
	 * semaphore and sets MyProc->waitStatus = STATUS_ERROR, allowing us
	 * to know that we must report failure rather than success.
733 734 735
	 *
	 * By delaying the check until we've waited for a bit, we can avoid
	 * running the rather expensive deadlock-check code in most cases.
736
	 */
737
	if (!enable_sig_alarm(DeadlockTimeout, false))
738
		elog(FATAL, "could not set timer for process wakeup");
739

740
	/*
741
	 * If someone wakes us between LWLockRelease and PGSemaphoreLock,
B
Bruce Momjian 已提交
742
	 * PGSemaphoreLock will not block.	The wakeup is "saved" by the
743
	 * semaphore implementation.  Note also that if CheckDeadLock is
744
	 * invoked but does not detect a deadlock, PGSemaphoreLock() will
745 746
	 * continue to wait.  There used to be a loop here, but it was useless
	 * code...
747 748 749 750
	 *
	 * We pass interruptOK = true, which eliminates a window in which
	 * cancel/die interrupts would be held off undesirably.  This is a
	 * promise that we don't mind losing control to a cancel/die interrupt
751 752
	 * here.  We don't, because we have no shared-state-change work to do
	 * after being granted the lock (the grantor did it all).  We do have
753 754
	 * to worry about updating the locallock table, but if we lose control
	 * to an error, LockWaitCancel will fix that up.
755
	 */
756
	PGSemaphoreLock(&MyProc->sem, true);
757

758
	/*
759
	 * Disable the timer, if it's still running
B
Bruce Momjian 已提交
760
	 */
761
	if (!disable_sig_alarm(false))
762
		elog(FATAL, "could not disable timer for process wakeup");
B
Bruce Momjian 已提交
763

764
	/*
765 766
	 * Re-acquire the locktable's masterLock.  We have to do this to hold
	 * off cancel/die interrupts before we can mess with waitingForLock
767
	 * (else we might have a missed or duplicated locallock update).
768 769 770 771 772
	 */
	LWLockAcquire(masterLock, LW_EXCLUSIVE);

	/*
	 * We no longer want LockWaitCancel to do anything.
773 774 775
	 */
	waitingForLock = false;

776
	/*
777
	 * If we got the lock, be sure to remember it in the locallock table.
778
	 */
779
	if (MyProc->waitStatus == STATUS_OK)
780
		GrantAwaitedLock();
781

782 783 784 785
	/*
	 * We don't have to do anything else, because the awaker did all the
	 * necessary update of the lock table and MyProc.
	 */
786
	return MyProc->waitStatus;
787 788 789 790 791 792
}


/*
 * ProcWakeup -- wake up a process by releasing its private semaphore.
 *
793
 *	 Also remove the process from the wait queue and set its links invalid.
794
 *	 RETURN: the next process in the wait queue.
795 796 797 798
 *
 * XXX: presently, this code is only used for the "success" case, and only
 * works correctly for that case.  To clean up in failure case, would need
 * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
799
 */
J
Jan Wieck 已提交
800
PGPROC *
801
ProcWakeup(PGPROC *proc, int waitStatus)
802
{
J
Jan Wieck 已提交
803
	PGPROC	   *retProc;
804

805
	/* assume that masterLock has been acquired */
806

807
	/* Proc should be sleeping ... */
808 809
	if (proc->links.prev == INVALID_OFFSET ||
		proc->links.next == INVALID_OFFSET)
810
		return NULL;
811

812
	/* Save next process before we zap the list link */
J
Jan Wieck 已提交
813
	retProc = (PGPROC *) MAKE_PTR(proc->links.next);
814

815
	/* Remove process from wait queue */
816
	SHMQueueDelete(&(proc->links));
817
	(proc->waitLock->waitProcs.size)--;
818

819 820
	/* Clean up process' state and pass it the ok/fail signal */
	proc->waitLock = NULL;
821
	proc->waitProcLock = NULL;
822
	proc->waitStatus = waitStatus;
823

824
	/* And awaken it */
825
	PGSemaphoreUnlock(&proc->sem);
826 827

	return retProc;
828 829 830 831
}

/*
 * ProcLockWakeup -- routine for waking up processes when a lock is
832 833
 *		released (or a prior waiter is aborted).  Scan all waiters
 *		for lock, waken any that are no longer blocked.
834
 */
835
void
836
ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
837
{
838 839
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			queue_size = waitQueue->size;
J
Jan Wieck 已提交
840
	PGPROC	   *proc;
841
	LOCKMASK	aheadRequests = 0;
M
 
Marc G. Fournier 已提交
842

843
	Assert(queue_size >= 0);
844

845 846
	if (queue_size == 0)
		return;
847

J
Jan Wieck 已提交
848
	proc = (PGPROC *) MAKE_PTR(waitQueue->links.next);
849

850 851
	while (queue_size-- > 0)
	{
B
Bruce Momjian 已提交
852
		LOCKMODE	lockmode = proc->waitLockMode;
M
 
Marc G. Fournier 已提交
853 854

		/*
855 856
		 * Waken if (a) doesn't conflict with requests of earlier waiters,
		 * and (b) doesn't conflict with already-held locks.
M
 
Marc G. Fournier 已提交
857
		 */
B
Bruce Momjian 已提交
858
		if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
859 860 861
			LockCheckConflicts(lockMethodTable,
							   lockmode,
							   lock,
862
							   proc->waitProcLock,
863
							   proc) == STATUS_OK)
M
 
Marc G. Fournier 已提交
864
		{
865
			/* OK to waken */
866
			GrantLock(lock, proc->waitProcLock, lockmode);
867
			proc = ProcWakeup(proc, STATUS_OK);
B
Bruce Momjian 已提交
868

869
			/*
B
Bruce Momjian 已提交
870 871 872
			 * ProcWakeup removes proc from the lock's waiting process
			 * queue and returns the next proc in chain; don't use proc's
			 * next-link, because it's been cleared.
873
			 */
M
 
Marc G. Fournier 已提交
874
		}
875
		else
876
		{
B
Bruce Momjian 已提交
877 878 879 880
			/*
			 * Cannot wake this guy. Remember his request for later
			 * checks.
			 */
881
			aheadRequests |= LOCKBIT_ON(lockmode);
J
Jan Wieck 已提交
882
			proc = (PGPROC *) MAKE_PTR(proc->links.next);
883
		}
M
 
Marc G. Fournier 已提交
884
	}
885 886

	Assert(waitQueue->size >= 0);
887 888 889
}

/* --------------------
890
 * We only get to this routine if we got SIGALRM after DeadlockTimeout
891 892 893 894
 * while waiting for a lock to be released by some other process.  Look
 * to see if there's a deadlock; if not, just return and continue waiting.
 * If we have a real deadlock, remove ourselves from the lock's wait queue
 * and signal an error to ProcSleep.
895 896
 * --------------------
 */
897
static void
898
CheckDeadLock(void)
899
{
900
	/*
901 902
	 * Acquire locktable lock.	Note that the deadlock check interrupt had
	 * better not be enabled anywhere that this process itself holds the
903
	 * locktable lock, else this will wait forever.  Also note that
904 905
	 * LWLockAcquire creates a critical section, so that this routine
	 * cannot be interrupted by cancel/die interrupts.
906
	 */
907
	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
908

909
	/*
910 911 912
	 * Check to see if we've been awoken by anyone in the interim.
	 *
	 * If we have we can return and resume our transaction -- happy day.
913 914
	 * Before we are awoken the process releasing the lock grants it to us
	 * so we know that we don't have to wait anymore.
915
	 *
916
	 * We check by looking to see if we've been unlinked from the wait queue.
917 918 919
	 * This is quicker than checking our semaphore's state, since no
	 * kernel call is needed, and it is safe because we hold the locktable
	 * lock.
920 921 922 923
	 */
	if (MyProc->links.prev == INVALID_OFFSET ||
		MyProc->links.next == INVALID_OFFSET)
	{
924
		LWLockRelease(LockMgrLock);
925 926 927
		return;
	}

928
#ifdef LOCK_DEBUG
B
Bruce Momjian 已提交
929 930
	if (Debug_deadlocks)
		DumpAllLocks();
931 932
#endif

933
	if (!DeadLockCheck(MyProc))
B
Bruce Momjian 已提交
934
	{
935
		/* No deadlock, so keep waiting */
936
		LWLockRelease(LockMgrLock);
B
Bruce Momjian 已提交
937 938 939
		return;
	}

940
	/*
941 942 943
	 * Oops.  We have a deadlock.
	 *
	 * Get this process out of wait state.
944
	 */
945 946
	RemoveFromWaitQueue(MyProc);

947
	/*
B
Bruce Momjian 已提交
948 949
	 * Set MyProc->waitStatus to STATUS_ERROR so that ProcSleep will
	 * report an error after we return from the signal handler.
950
	 */
951
	MyProc->waitStatus = STATUS_ERROR;
952

953 954 955
	/*
	 * Unlock my semaphore so that the interrupted ProcSleep() call can
	 * finish.
956
	 */
957
	PGSemaphoreUnlock(&MyProc->sem);
958

959 960 961 962 963 964 965 966 967
	/*
	 * We're done here.  Transaction abort caused by the error that
	 * ProcSleep will raise will cause any other locks we hold to be
	 * released, thus allowing other processes to wake up; we don't need
	 * to do that here. NOTE: an exception is that releasing locks we hold
	 * doesn't consider the possibility of waiters that were blocked
	 * behind us on the lock we just failed to get, and might now be
	 * wakable because we're not in front of them anymore.  However,
	 * RemoveFromWaitQueue took care of waking up any such processes.
968
	 */
969
	LWLockRelease(LockMgrLock);
970 971 972
}


973 974 975 976 977 978 979 980 981 982 983
/*
 * ProcWaitForSignal - wait for a signal from another backend.
 *
 * This can share the semaphore normally used for waiting for locks,
 * since a backend could never be waiting for a lock and a signal at
 * the same time.  As with locks, it's OK if the signal arrives just
 * before we actually reach the waiting state.
 */
void
ProcWaitForSignal(void)
{
984
	PGSemaphoreLock(&MyProc->sem, true);
985 986 987 988 989 990 991 992 993 994 995 996
}

/*
 * ProcCancelWaitForSignal - clean up an aborted wait for signal
 *
 * We need this in case the signal arrived after we aborted waiting,
 * or if it arrived but we never reached ProcWaitForSignal() at all.
 * Caller should call this after resetting the signal request status.
 */
void
ProcCancelWaitForSignal(void)
{
997
	PGSemaphoreReset(&MyProc->sem);
998 999 1000
}

/*
1001
 * ProcSendSignal - send a signal to a backend identified by PID
1002 1003
 */
void
1004
ProcSendSignal(int pid)
1005
{
1006
	PGPROC	   *proc = BackendPidGetProc(pid);
1007 1008

	if (proc != NULL)
1009
		PGSemaphoreUnlock(&proc->sem);
1010 1011 1012
}


1013 1014 1015 1016 1017 1018 1019 1020 1021
/*****************************************************************************
 * SIGALRM interrupt support
 *
 * Maybe these should be in pqsignal.c?
 *****************************************************************************/

/*
 * Enable the SIGALRM interrupt to fire after the specified delay
 *
1022
 * Delay is given in milliseconds.	Caller should be sure a SIGALRM
1023 1024
 * signal handler is installed before this is called.
 *
1025 1026
 * This code properly handles nesting of deadlock timeout alarms within
 * statement timeout alarms.
1027
 *
1028 1029 1030
 * Returns TRUE if okay, FALSE on failure.
 */
bool
1031
enable_sig_alarm(int delayms, bool is_statement_timeout)
1032
{
1033
	struct timeval fin_time;
B
Bruce Momjian 已提交
1034

1035
#ifndef __BEOS__
1036
	struct itimerval timeval;
B
Bruce Momjian 已提交
1037

1038
#else
1039
	bigtime_t	time_interval;
1040
#endif
1041

1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
	/* Compute target timeout time if we will need it */
	if (is_statement_timeout || statement_timeout_active)
	{
		gettimeofday(&fin_time, NULL);
		fin_time.tv_sec += delayms / 1000;
		fin_time.tv_usec += (delayms % 1000) * 1000;
		if (fin_time.tv_usec >= 1000000)
		{
			fin_time.tv_sec++;
			fin_time.tv_usec -= 1000000;
		}
	}

	if (is_statement_timeout)
	{
		/* Begin statement-level timeout */
		Assert(!deadlock_timeout_active);
		statement_fin_time = fin_time;
		statement_timeout_active = true;
	}
	else if (statement_timeout_active)
	{
		/*
		 * Begin deadlock timeout with statement-level timeout active
		 *
B
Bruce Momjian 已提交
1067 1068 1069 1070
		 * Here, we want to interrupt at the closer of the two timeout times.
		 * If fin_time >= statement_fin_time then we need not touch the
		 * existing timer setting; else set up to interrupt at the
		 * deadlock timeout time.
1071 1072 1073
		 *
		 * NOTE: in this case it is possible that this routine will be
		 * interrupted by the previously-set timer alarm.  This is okay
B
Bruce Momjian 已提交
1074 1075 1076
		 * because the signal handler will do only what it should do
		 * according to the state variables.  The deadlock checker may get
		 * run earlier than normal, but that does no harm.
1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
		 */
		deadlock_timeout_active = true;
		if (fin_time.tv_sec > statement_fin_time.tv_sec ||
			(fin_time.tv_sec == statement_fin_time.tv_sec &&
			 fin_time.tv_usec >= statement_fin_time.tv_usec))
			return true;
	}
	else
	{
		/* Begin deadlock timeout with no statement-level timeout */
		deadlock_timeout_active = true;
	}
1089

1090
	/* If we reach here, okay to set the timer interrupt */
1091
#ifndef __BEOS__
1092
	MemSet(&timeval, 0, sizeof(struct itimerval));
1093 1094
	timeval.it_value.tv_sec = delayms / 1000;
	timeval.it_value.tv_usec = (delayms % 1000) * 1000;
1095
	if (setitimer(ITIMER_REAL, &timeval, NULL))
1096 1097 1098
		return false;
#else
	/* BeOS doesn't have setitimer, but has set_alarm */
1099
	time_interval = delayms * 1000;		/* usecs */
1100
	if (set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM) < 0)
1101
		return false;
1102
#endif
1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123
	return true;
}

/*
 * Cancel the SIGALRM timer, either for a deadlock timeout or a statement
 * timeout.  If a deadlock timeout is canceled, any active statement timeout
 * remains in force.
 *
 * Returns TRUE if okay, FALSE on failure.
 */
bool
disable_sig_alarm(bool is_statement_timeout)
{
	/*
	 * Always disable the interrupt if it is active; this avoids being
	 * interrupted by the signal handler and thereby possibly getting
	 * confused.
	 *
	 * We will re-enable the interrupt if necessary in CheckStatementTimeout.
	 */
	if (statement_timeout_active || deadlock_timeout_active)
1124 1125
	{
#ifndef __BEOS__
1126
		struct itimerval timeval;
1127

1128
		MemSet(&timeval, 0, sizeof(struct itimerval));
1129
		if (setitimer(ITIMER_REAL, &timeval, NULL))
1130
		{
1131 1132 1133
			statement_timeout_active = deadlock_timeout_active = false;
			return false;
		}
1134
#else
1135 1136 1137 1138 1139
		/* BeOS doesn't have setitimer, but has set_alarm */
		if (set_alarm(B_INFINITE_TIMEOUT, B_PERIODIC_ALARM) < 0)
		{
			statement_timeout_active = deadlock_timeout_active = false;
			return false;
1140
		}
1141
#endif
1142 1143
	}

1144 1145 1146 1147
	/* Always cancel deadlock timeout, in case this is error cleanup */
	deadlock_timeout_active = false;

	/* Cancel or reschedule statement timeout */
1148
	if (is_statement_timeout)
1149 1150 1151 1152 1153 1154
		statement_timeout_active = false;
	else if (statement_timeout_active)
	{
		if (!CheckStatementTimeout())
			return false;
	}
1155 1156 1157
	return true;
}

1158

1159
/*
1160 1161 1162
 * Check for statement timeout.  If the timeout time has come,
 * trigger a query-cancel interrupt; if not, reschedule the SIGALRM
 * interrupt to occur at the right time.
1163
 *
1164
 * Returns true if okay, false if failed to set the interrupt.
1165
 */
1166 1167
static bool
CheckStatementTimeout(void)
1168
{
1169
	struct timeval now;
B
Bruce Momjian 已提交
1170

1171 1172 1173 1174
	if (!statement_timeout_active)
		return true;			/* do nothing if not active */

	gettimeofday(&now, NULL);
1175

1176 1177 1178
	if (now.tv_sec > statement_fin_time.tv_sec ||
		(now.tv_sec == statement_fin_time.tv_sec &&
		 now.tv_usec >= statement_fin_time.tv_usec))
1179
	{
1180 1181
		/* Time to die */
		statement_timeout_active = false;
1182
		kill(MyProcPid, SIGINT);
1183 1184 1185 1186
	}
	else
	{
		/* Not time yet, so (re)schedule the interrupt */
1187
#ifndef __BEOS__
1188 1189
		struct itimerval timeval;

1190
		MemSet(&timeval, 0, sizeof(struct itimerval));
1191 1192 1193 1194 1195 1196 1197 1198
		timeval.it_value.tv_sec = statement_fin_time.tv_sec - now.tv_sec;
		timeval.it_value.tv_usec = statement_fin_time.tv_usec - now.tv_usec;
		if (timeval.it_value.tv_usec < 0)
		{
			timeval.it_value.tv_sec--;
			timeval.it_value.tv_usec += 1000000;
		}
		if (setitimer(ITIMER_REAL, &timeval, NULL))
1199 1200 1201
			return false;
#else
		/* BeOS doesn't have setitimer, but has set_alarm */
1202
		bigtime_t	time_interval;
B
Bruce Momjian 已提交
1203

1204 1205 1206 1207
		time_interval =
			(statement_fin_time.tv_sec - now.tv_sec) * 1000000 +
			(statement_fin_time.tv_usec - now.tv_usec);
		if (set_alarm(time_interval, B_ONE_SHOT_RELATIVE_ALARM) < 0)
1208 1209 1210 1211
			return false;
#endif
	}

1212 1213
	return true;
}
1214 1215 1216


/*
1217 1218 1219 1220 1221 1222
 * Signal handler for SIGALRM
 *
 * Process deadlock check and/or statement timeout check, as needed.
 * To avoid various edge cases, we must be careful to do nothing
 * when there is nothing to be done.  We also need to be able to
 * reschedule the timer interrupt if called before end of statement.
1223 1224 1225 1226
 */
void
handle_sig_alarm(SIGNAL_ARGS)
{
1227 1228 1229
	int			save_errno = errno;

	if (deadlock_timeout_active)
1230
	{
1231
		deadlock_timeout_active = false;
1232 1233
		CheckDeadLock();
	}
1234 1235 1236 1237 1238

	if (statement_timeout_active)
		(void) CheckStatementTimeout();

	errno = save_errno;
1239
}