/*------------------------------------------------------------------------- * * freelist.c * routines for managing the buffer pool's replacement strategy. * * * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.56 2006/07/23 18:34:45 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" /* * The shared freelist control information. */ typedef struct { /* Clock sweep hand: index of next buffer to consider grabbing */ int nextVictimBuffer; int firstFreeBuffer; /* Head of list of unused buffers */ int lastFreeBuffer; /* Tail of list of unused buffers */ /* * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is, * when the list is empty) */ } BufferStrategyControl; /* Pointers to shared state */ static BufferStrategyControl *StrategyControl = NULL; /* Backend-local state about whether currently vacuuming */ bool strategy_hint_vacuum = false; /* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * the selected buffer must not currently be pinned by anyone. * * To ensure that no one else can pin the buffer before we do, we must * return the buffer with the buffer header spinlock still held. That * means that we return with the BufFreelistLock still held, as well; * the caller must release that lock once the spinlock is dropped. */ volatile BufferDesc * StrategyGetBuffer(void) { volatile BufferDesc *buf; int trycounter; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* * Try to get a buffer from the freelist. Note that the freeNext fields * are considered to be protected by the BufFreelistLock not the * individual buffer spinlocks, so it's OK to manipulate them without * holding the spinlock. */ while (StrategyControl->firstFreeBuffer >= 0) { buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ StrategyControl->firstFreeBuffer = buf->freeNext; buf->freeNext = FREENEXT_NOT_IN_LIST; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; discard it and retry. (This can only happen if VACUUM put a * valid buffer in the freelist and then someone else used it before * we got to it.) */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) return buf; UnlockBufHdr(buf); } /* Nothing on the freelist, so run the "clock sweep" algorithm */ trycounter = NBuffers; for (;;) { buf = &BufferDescriptors[StrategyControl->nextVictimBuffer]; if (++StrategyControl->nextVictimBuffer >= NBuffers) StrategyControl->nextVictimBuffer = 0; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; decrement the usage_count and keep scanning. */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) return buf; if (buf->usage_count > 0) { buf->usage_count--; trycounter = NBuffers; } else if (--trycounter == 0) { /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); } UnlockBufHdr(buf); } /* not reached */ return NULL; } /* * StrategyFreeBuffer: put a buffer on the freelist * * The buffer is added either at the head or the tail, according to the * at_head parameter. This allows a small amount of control over how * quickly the buffer is reused. */ void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head) { LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* * It is possible that we are told to put something in the freelist that * is already in it; don't screw up the list if so. */ if (buf->freeNext == FREENEXT_NOT_IN_LIST) { if (at_head) { buf->freeNext = StrategyControl->firstFreeBuffer; if (buf->freeNext < 0) StrategyControl->lastFreeBuffer = buf->buf_id; StrategyControl->firstFreeBuffer = buf->buf_id; } else { buf->freeNext = FREENEXT_END_OF_LIST; if (StrategyControl->firstFreeBuffer < 0) StrategyControl->firstFreeBuffer = buf->buf_id; else BufferDescriptors[StrategyControl->lastFreeBuffer].freeNext = buf->buf_id; StrategyControl->lastFreeBuffer = buf->buf_id; } } LWLockRelease(BufFreelistLock); } /* * StrategySyncStart -- tell BufferSync where to start syncing * * The result is the buffer index of the best buffer to sync first. * BufferSync() will proceed circularly around the buffer array from there. */ int StrategySyncStart(void) { int result; /* * We could probably dispense with the locking here, but just to be safe * ... */ LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); result = StrategyControl->nextVictimBuffer; LWLockRelease(BufFreelistLock); return result; } /* * StrategyHintVacuum -- tell us whether VACUUM is active */ void StrategyHintVacuum(bool vacuum_active) { strategy_hint_vacuum = vacuum_active; } /* * StrategyShmemSize * * estimate the size of shared memory used by the freelist-related structures. * * Note: for somewhat historical reasons, the buffer lookup hashtable size * is also determined here. */ Size StrategyShmemSize(void) { Size size = 0; /* size of lookup hash table ... see comment in StrategyInitialize */ size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS)); /* size of the shared replacement strategy control block */ size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl))); return size; } /* * StrategyInitialize -- initialize the buffer cache replacement * strategy. * * Assumes: All of the buffers are already built into a linked list. * Only called by postmaster and only during initialization. */ void StrategyInitialize(bool init) { bool found; /* * Initialize the shared buffer lookup hashtable. * * Since we can't tolerate running out of lookup table entries, we * must be sure to specify an adequate table size here. The maximum * steady-state usage is of course NBuffers entries, but BufferAlloc() * tries to insert a new entry before deleting the old. In principle * this could be happening in each partition concurrently, so we * could need as many as NBuffers + NUM_BUFFER_PARTITIONS entries. */ InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS); /* * Get or create the shared strategy control block */ StrategyControl = (BufferStrategyControl *) ShmemInitStruct("Buffer Strategy Status", sizeof(BufferStrategyControl), &found); if (!found) { /* * Only done once, usually in postmaster */ Assert(init); /* * Grab the whole linked list of free buffers for our strategy. We * assume it was previously set up by InitBufferPool(). */ StrategyControl->firstFreeBuffer = 0; StrategyControl->lastFreeBuffer = NBuffers - 1; /* Initialize the clock sweep pointer */ StrategyControl->nextVictimBuffer = 0; } else Assert(!init); }