diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index b0aff28a596f05e19f1ca30e5d9089774ca3ed21..23bc2fb062990fb352563d725188251a030e2d94 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.69 2003/08/08 21:41:27 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.70 2003/08/10 19:48:08 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -409,6 +409,22 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) * that the page is still free. (For example, an already-free * page could have been re-used between the time the last VACUUM * scanned it and the time the VACUUM made its FSM updates.) + * + * In fact, it's worse than that: we can't even assume that it's + * safe to take a lock on the reported page. If somebody else + * has a lock on it, or even worse our own caller does, we could + * deadlock. (The own-caller scenario is actually not improbable. + * Consider an index on a serial or timestamp column. Nearly all + * splits will be at the rightmost page, so it's entirely likely + * that _bt_split will call us while holding a lock on the page most + * recently acquired from FSM. A VACUUM running concurrently with + * the previous split could well have placed that page back in FSM.) + * + * To get around that, we ask for only a conditional lock on the + * reported page. If we fail, then someone else is using the page, + * and we may reasonably assume it's not free. (If we happen to be + * wrong, the worst consequence is the page will be lost to use till + * the next VACUUM, which is no big problem.) */ for (;;) { @@ -416,16 +432,24 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) if (blkno == InvalidBlockNumber) break; buf = ReadBuffer(rel, blkno); - LockBuffer(buf, access); - page = BufferGetPage(buf); - if (_bt_page_recyclable(page)) + if (ConditionalLockBuffer(buf)) { - /* Okay to use page. Re-initialize and return it */ - _bt_pageinit(page, BufferGetPageSize(buf)); - return buf; + page = BufferGetPage(buf); + if (_bt_page_recyclable(page)) + { + /* Okay to use page. Re-initialize and return it */ + _bt_pageinit(page, BufferGetPageSize(buf)); + return buf; + } + elog(DEBUG2, "FSM returned nonrecyclable page"); + _bt_relbuf(rel, buf); + } + else + { + elog(DEBUG2, "FSM returned nonlockable page"); + /* couldn't get lock, so just drop pin */ + ReleaseBuffer(buf); } - elog(DEBUG2, "FSM returned nonrecyclable page"); - _bt_relbuf(rel, buf); } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 2da3b4e364d3968ae2cee01796f7666d15473ae3..1be30d30e2b6545ab3d008f8e952f9c520397f17 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.139 2003/08/04 02:40:03 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.140 2003/08/10 19:48:08 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1936,6 +1936,37 @@ LockBuffer(Buffer buffer, int mode) elog(ERROR, "unrecognized buffer lock mode: %d", mode); } +/* + * Acquire the cntx_lock for the buffer, but only if we don't have to wait. + * + * This assumes the caller wants BUFFER_LOCK_EXCLUSIVE mode. + */ +bool +ConditionalLockBuffer(Buffer buffer) +{ + BufferDesc *buf; + + Assert(BufferIsValid(buffer)); + if (BufferIsLocal(buffer)) + return true; /* act as though we got it */ + + buf = &(BufferDescriptors[buffer - 1]); + + if (LWLockConditionalAcquire(buf->cntx_lock, LW_EXCLUSIVE)) + { + /* + * This is not the best place to set cntxDirty flag (eg indices do + * not always change buffer they lock in excl mode). But please + * remember that it's critical to set cntxDirty *before* logging + * changes with XLogInsert() - see comments in BufferSync(). + */ + buf->cntxDirty = true; + + return true; + } + return false; +} + /* * LockBufferForCleanup - lock a buffer in preparation for deleting items * diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 5d78a3b9b2a6742b82c30c262a8b21b784d37cdb..bfbf64e207f5dc9b0f0eaca8edf9cd0f97acd9c9 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: bufmgr.h,v 1.69 2003/08/04 02:40:14 momjian Exp $ + * $Id: bufmgr.h,v 1.70 2003/08/10 19:48:08 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -180,6 +180,7 @@ extern void SetBufferCommitInfoNeedsSave(Buffer buffer); extern void UnlockBuffers(void); extern void LockBuffer(Buffer buffer, int mode); +extern bool ConditionalLockBuffer(Buffer buffer); extern void LockBufferForCleanup(Buffer buffer); extern void AbortBufferIO(void);