提交 0fcc3c2f 编写于 作者: T Tom Lane

Repair a low-probability race condition identified by Qingqing Zhou.

If a process abandons a wait in LockBufferForCleanup (in practice,
only happens if someone cancels a VACUUM) just before someone else
sends it a signal indicating the buffer is available, it was possible
for the wakeup to remain in the process' semaphore, causing misbehavior
next time the process waited for an lmgr lock.  Rather than try to
prevent the race condition directly, it seems best to make the lock
manager robust against leftover wakeups, by having it repeat waiting
on the semaphore if the lock has not actually been granted or denied
yet.
上级 cc39aca7
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.206 2006/03/31 23:32:06 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.207 2006/04/14 03:38:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1726,8 +1726,6 @@ UnlockBuffers(void) ...@@ -1726,8 +1726,6 @@ UnlockBuffers(void)
if (buf) if (buf)
{ {
HOLD_INTERRUPTS(); /* don't want to die() partway through... */
LockBufHdr(buf); LockBufHdr(buf);
/* /*
...@@ -1740,11 +1738,7 @@ UnlockBuffers(void) ...@@ -1740,11 +1738,7 @@ UnlockBuffers(void)
UnlockBufHdr(buf); UnlockBufHdr(buf);
ProcCancelWaitForSignal();
PinCountWaitBuf = NULL; PinCountWaitBuf = NULL;
RESUME_INTERRUPTS();
} }
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.163 2006/03/05 15:58:38 momjian Exp $ * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.164 2006/04/14 03:38:55 tgl Exp $
* *
* NOTES * NOTES
* A lock table is a shared memory hash table. When * A lock table is a shared memory hash table. When
...@@ -1116,10 +1116,12 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner) ...@@ -1116,10 +1116,12 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
} }
/* /*
* Remove a proc from the wait-queue it is on * Remove a proc from the wait-queue it is on (caller must know it is on one).
* (caller must know it is on one). * This is only used when the proc has failed to get the lock, so we set its
* waitStatus to STATUS_ERROR.
* *
* Appropriate partition lock must be held by caller. * Appropriate partition lock must be held by caller. Also, caller is
* responsible for signaling the proc if needed.
* *
* NB: this does not clean up any locallock object that may exist for the lock. * NB: this does not clean up any locallock object that may exist for the lock.
*/ */
...@@ -1132,6 +1134,7 @@ RemoveFromWaitQueue(PGPROC *proc, int partition) ...@@ -1132,6 +1134,7 @@ RemoveFromWaitQueue(PGPROC *proc, int partition)
LOCKMETHODID lockmethodid = LOCK_LOCKMETHOD(*waitLock); LOCKMETHODID lockmethodid = LOCK_LOCKMETHOD(*waitLock);
/* Make sure proc is waiting */ /* Make sure proc is waiting */
Assert(proc->waitStatus == STATUS_WAITING);
Assert(proc->links.next != INVALID_OFFSET); Assert(proc->links.next != INVALID_OFFSET);
Assert(waitLock); Assert(waitLock);
Assert(waitLock->waitProcs.size > 0); Assert(waitLock->waitProcs.size > 0);
...@@ -1151,9 +1154,10 @@ RemoveFromWaitQueue(PGPROC *proc, int partition) ...@@ -1151,9 +1154,10 @@ RemoveFromWaitQueue(PGPROC *proc, int partition)
if (waitLock->granted[lockmode] == waitLock->requested[lockmode]) if (waitLock->granted[lockmode] == waitLock->requested[lockmode])
waitLock->waitMask &= LOCKBIT_OFF(lockmode); waitLock->waitMask &= LOCKBIT_OFF(lockmode);
/* Clean up the proc's own state */ /* Clean up the proc's own state, and pass it the ok/fail signal */
proc->waitLock = NULL; proc->waitLock = NULL;
proc->waitProcLock = NULL; proc->waitProcLock = NULL;
proc->waitStatus = STATUS_ERROR;
/* /*
* Delete the proclock immediately if it represents no already-held locks. * Delete the proclock immediately if it represents no already-held locks.
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.173 2006/03/05 15:58:39 momjian Exp $ * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.174 2006/04/14 03:38:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -267,7 +267,8 @@ InitProcess(void) ...@@ -267,7 +267,8 @@ InitProcess(void)
/* /*
* We might be reusing a semaphore that belonged to a failed process. So * We might be reusing a semaphore that belonged to a failed process. So
* be careful and reinitialize its value here. * be careful and reinitialize its value here. (This is not strictly
* necessary anymore, but seems like a good idea for cleanliness.)
*/ */
PGSemaphoreReset(&MyProc->sem); PGSemaphoreReset(&MyProc->sem);
...@@ -397,7 +398,8 @@ InitDummyProcess(void) ...@@ -397,7 +398,8 @@ InitDummyProcess(void)
/* /*
* We might be reusing a semaphore that belonged to a failed process. So * We might be reusing a semaphore that belonged to a failed process. So
* be careful and reinitialize its value here. * be careful and reinitialize its value here. (This is not strictly
* necessary anymore, but seems like a good idea for cleanliness.)
*/ */
PGSemaphoreReset(&MyProc->sem); PGSemaphoreReset(&MyProc->sem);
...@@ -465,7 +467,6 @@ LockWaitCancel(void) ...@@ -465,7 +467,6 @@ LockWaitCancel(void)
if (MyProc->links.next != INVALID_OFFSET) if (MyProc->links.next != INVALID_OFFSET)
{ {
/* We could not have been granted the lock yet */ /* We could not have been granted the lock yet */
Assert(MyProc->waitStatus == STATUS_ERROR);
RemoveFromWaitQueue(MyProc, lockAwaited->partition); RemoveFromWaitQueue(MyProc, lockAwaited->partition);
} }
else else
...@@ -485,15 +486,14 @@ LockWaitCancel(void) ...@@ -485,15 +486,14 @@ LockWaitCancel(void)
LWLockRelease(partitionLock); LWLockRelease(partitionLock);
/* /*
* Reset the proc wait semaphore to zero. This is necessary in the * We used to do PGSemaphoreReset() here to ensure that our proc's wait
* scenario where someone else granted us the lock we wanted before we * semaphore is reset to zero. This prevented a leftover wakeup signal
* were able to remove ourselves from the wait-list. The semaphore will * from remaining in the semaphore if someone else had granted us the
* have been bumped to 1 by the would-be grantor, and since we are no * lock we wanted before we were able to remove ourselves from the
* longer going to wait on the sema, we have to force it back to zero. * wait-list. However, now that ProcSleep loops until waitStatus changes,
* Otherwise, our next attempt to wait for a lock will fall through * a leftover wakeup signal isn't harmful, and it seems not worth
* prematurely. * expending cycles to get rid of a signal that most likely isn't there.
*/ */
PGSemaphoreReset(&MyProc->sem);
/* /*
* Return true even if we were kicked off the lock before we were able to * Return true even if we were kicked off the lock before we were able to
...@@ -767,7 +767,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -767,7 +767,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
MyProc->waitProcLock = proclock; MyProc->waitProcLock = proclock;
MyProc->waitLockMode = lockmode; MyProc->waitLockMode = lockmode;
MyProc->waitStatus = STATUS_ERROR; /* initialize result for error */ MyProc->waitStatus = STATUS_WAITING;
/* /*
* If we detected deadlock, give up without waiting. This must agree with * If we detected deadlock, give up without waiting. This must agree with
...@@ -808,9 +808,11 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -808,9 +808,11 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
/* /*
* If someone wakes us between LWLockRelease and PGSemaphoreLock, * If someone wakes us between LWLockRelease and PGSemaphoreLock,
* PGSemaphoreLock will not block. The wakeup is "saved" by the semaphore * PGSemaphoreLock will not block. The wakeup is "saved" by the semaphore
* implementation. Note also that if CheckDeadLock is invoked but does * implementation. While this is normally good, there are cases where
* not detect a deadlock, PGSemaphoreLock() will continue to wait. There * a saved wakeup might be leftover from a previous operation (for
* used to be a loop here, but it was useless code... * example, we aborted ProcWaitForSignal just before someone did
* ProcSendSignal). So, loop to wait again if the waitStatus shows
* we haven't been granted nor denied the lock yet.
* *
* We pass interruptOK = true, which eliminates a window in which * We pass interruptOK = true, which eliminates a window in which
* cancel/die interrupts would be held off undesirably. This is a promise * cancel/die interrupts would be held off undesirably. This is a promise
...@@ -820,7 +822,9 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -820,7 +822,9 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
* updating the locallock table, but if we lose control to an error, * updating the locallock table, but if we lose control to an error,
* LockWaitCancel will fix that up. * LockWaitCancel will fix that up.
*/ */
PGSemaphoreLock(&MyProc->sem, true); do {
PGSemaphoreLock(&MyProc->sem, true);
} while (MyProc->waitStatus == STATUS_WAITING);
/* /*
* Disable the timer, if it's still running * Disable the timer, if it's still running
...@@ -865,6 +869,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -865,6 +869,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
* XXX: presently, this code is only used for the "success" case, and only * XXX: presently, this code is only used for the "success" case, and only
* works correctly for that case. To clean up in failure case, would need * works correctly for that case. To clean up in failure case, would need
* to twiddle the lock's request counts too --- see RemoveFromWaitQueue. * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
* Hence, in practice the waitStatus parameter must be STATUS_OK.
*/ */
PGPROC * PGPROC *
ProcWakeup(PGPROC *proc, int waitStatus) ProcWakeup(PGPROC *proc, int waitStatus)
...@@ -875,6 +880,7 @@ ProcWakeup(PGPROC *proc, int waitStatus) ...@@ -875,6 +880,7 @@ ProcWakeup(PGPROC *proc, int waitStatus)
if (proc->links.prev == INVALID_OFFSET || if (proc->links.prev == INVALID_OFFSET ||
proc->links.next == INVALID_OFFSET) proc->links.next == INVALID_OFFSET)
return NULL; return NULL;
Assert(proc->waitStatus == STATUS_WAITING);
/* Save next process before we zap the list link */ /* Save next process before we zap the list link */
retProc = (PGPROC *) MAKE_PTR(proc->links.next); retProc = (PGPROC *) MAKE_PTR(proc->links.next);
...@@ -1014,16 +1020,13 @@ CheckDeadLock(void) ...@@ -1014,16 +1020,13 @@ CheckDeadLock(void)
* efficiently by relying on lockAwaited, but use this coding to preserve * efficiently by relying on lockAwaited, but use this coding to preserve
* the flexibility to kill some other transaction than the one detecting * the flexibility to kill some other transaction than the one detecting
* the deadlock.) * the deadlock.)
*
* RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
* ProcSleep will report an error after we return from the signal handler.
*/ */
Assert(MyProc->waitLock != NULL); Assert(MyProc->waitLock != NULL);
RemoveFromWaitQueue(MyProc, LockTagToPartition(&(MyProc->waitLock->tag))); RemoveFromWaitQueue(MyProc, LockTagToPartition(&(MyProc->waitLock->tag)));
/*
* Set MyProc->waitStatus to STATUS_ERROR so that ProcSleep will report an
* error after we return from the signal handler.
*/
MyProc->waitStatus = STATUS_ERROR;
/* /*
* Unlock my semaphore so that the interrupted ProcSleep() call can * Unlock my semaphore so that the interrupted ProcSleep() call can
* finish. * finish.
...@@ -1058,7 +1061,10 @@ check_done: ...@@ -1058,7 +1061,10 @@ check_done:
* This can share the semaphore normally used for waiting for locks, * This can share the semaphore normally used for waiting for locks,
* since a backend could never be waiting for a lock and a signal at * since a backend could never be waiting for a lock and a signal at
* the same time. As with locks, it's OK if the signal arrives just * the same time. As with locks, it's OK if the signal arrives just
* before we actually reach the waiting state. * before we actually reach the waiting state. Also as with locks,
* it's necessary that the caller be robust against bogus wakeups:
* always check that the desired state has occurred, and wait again
* if not. This copes with possible "leftover" wakeups.
*/ */
void void
ProcWaitForSignal(void) ProcWaitForSignal(void)
...@@ -1066,19 +1072,6 @@ ProcWaitForSignal(void) ...@@ -1066,19 +1072,6 @@ ProcWaitForSignal(void)
PGSemaphoreLock(&MyProc->sem, true); PGSemaphoreLock(&MyProc->sem, true);
} }
/*
* ProcCancelWaitForSignal - clean up an aborted wait for signal
*
* We need this in case the signal arrived after we aborted waiting,
* or if it arrived but we never reached ProcWaitForSignal() at all.
* Caller should call this after resetting the signal request status.
*/
void
ProcCancelWaitForSignal(void)
{
PGSemaphoreReset(&MyProc->sem);
}
/* /*
* ProcSendSignal - send a signal to a backend identified by PID * ProcSendSignal - send a signal to a backend identified by PID
*/ */
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/c.h,v 1.199 2006/03/05 15:58:52 momjian Exp $ * $PostgreSQL: pgsql/src/include/c.h,v 1.200 2006/04/14 03:38:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -710,6 +710,7 @@ typedef NameData *Name; ...@@ -710,6 +710,7 @@ typedef NameData *Name;
#define STATUS_ERROR (-1) #define STATUS_ERROR (-1)
#define STATUS_EOF (-2) #define STATUS_EOF (-2)
#define STATUS_FOUND (1) #define STATUS_FOUND (1)
#define STATUS_WAITING (2)
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.87 2006/03/05 15:59:00 momjian Exp $ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.88 2006/04/14 03:38:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -61,7 +61,7 @@ struct PGPROC ...@@ -61,7 +61,7 @@ struct PGPROC
SHM_QUEUE links; /* list link if process is in a list */ SHM_QUEUE links; /* list link if process is in a list */
PGSemaphoreData sem; /* ONE semaphore to sleep on */ PGSemaphoreData sem; /* ONE semaphore to sleep on */
int waitStatus; /* STATUS_OK or STATUS_ERROR after wakeup */ int waitStatus; /* STATUS_WAITING, STATUS_OK or STATUS_ERROR */
TransactionId xid; /* transaction currently being executed by TransactionId xid; /* transaction currently being executed by
* this proc */ * this proc */
...@@ -147,7 +147,6 @@ extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock); ...@@ -147,7 +147,6 @@ extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
extern bool LockWaitCancel(void); extern bool LockWaitCancel(void);
extern void ProcWaitForSignal(void); extern void ProcWaitForSignal(void);
extern void ProcCancelWaitForSignal(void);
extern void ProcSendSignal(int pid); extern void ProcSendSignal(int pid);
extern bool enable_sig_alarm(int delayms, bool is_statement_timeout); extern bool enable_sig_alarm(int delayms, bool is_statement_timeout);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册