提交 6b86d62b 编写于 作者: J Jan Wieck

2nd try for the ARC strategy.

I added a couple more Assertions while tracking down the exact
cause of the former bug.

All 93 regression tests pass now.

Jan
上级 923e994d
......@@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.267 2003/11/13 05:34:57 wieck Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.268 2003/11/13 14:57:15 wieck Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -33,6 +33,7 @@
#include "commands/vacuum.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "storage/buf_internals.h"
#include "storage/freespace.h"
#include "storage/sinval.h"
#include "storage/smgr.h"
......@@ -310,8 +311,16 @@ vacuum(VacuumStmt *vacstmt)
else
old_context = MemoryContextSwitchTo(anl_context);
/*
* Tell the buffer replacement strategy that vacuum is
* causing the IO
*/
StrategyHintVacuum(true);
analyze_rel(relid, vacstmt);
StrategyHintVacuum(false);
if (vacstmt->vacuum)
CommitTransactionCommand();
else
......@@ -749,6 +758,12 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
SetQuerySnapshot(); /* might be needed for functions in
* indexes */
/*
* Tell the cache replacement strategy that vacuum is causing
* all following IO
*/
StrategyHintVacuum(true);
/*
* Check for user-requested abort. Note we want this to be inside a
* transaction, so xact.c doesn't issue useless WARNING.
......@@ -763,6 +778,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
ObjectIdGetDatum(relid),
0, 0, 0))
{
StrategyHintVacuum(false);
CommitTransactionCommand();
return true; /* okay 'cause no data there */
}
......@@ -796,6 +812,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
(errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
RelationGetRelationName(onerel))));
relation_close(onerel, lmode);
StrategyHintVacuum(false);
CommitTransactionCommand();
return false;
}
......@@ -810,6 +827,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
(errmsg("skipping \"%s\" --- cannot vacuum indexes, views, or special system tables",
RelationGetRelationName(onerel))));
relation_close(onerel, lmode);
StrategyHintVacuum(false);
CommitTransactionCommand();
return false;
}
......@@ -824,6 +842,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
if (isOtherTempNamespace(RelationGetNamespace(onerel)))
{
relation_close(onerel, lmode);
StrategyHintVacuum(false);
CommitTransactionCommand();
return true; /* assume no long-lived data in temp
* tables */
......@@ -863,6 +882,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
/*
* Complete the transaction and free all temporary memory used.
*/
StrategyHintVacuum(false);
CommitTransactionCommand();
/*
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.56 2003/11/13 05:34:58 wieck Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.57 2003/11/13 14:57:15 wieck Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -48,9 +48,6 @@ long *CurTraceBuf;
int ShowPinTrace = 0;
int Data_Descriptors;
int Free_List_Descriptor;
int Lookup_List_Descriptor;
int Num_Descriptors;
BufferDesc *BufferDescriptors;
Block *BufferBlockPointers;
......@@ -133,9 +130,6 @@ InitBufferPool(void)
int i;
Data_Descriptors = NBuffers;
Free_List_Descriptor = Data_Descriptors;
Lookup_List_Descriptor = Data_Descriptors + 1;
Num_Descriptors = Data_Descriptors + 1;
/*
* It's probably not really necessary to grab the lock --- if there's
......@@ -156,7 +150,7 @@ InitBufferPool(void)
BufferDescriptors = (BufferDesc *)
ShmemInitStruct("Buffer Descriptors",
Num_Descriptors * sizeof(BufferDesc), &foundDescs);
Data_Descriptors * sizeof(BufferDesc), &foundDescs);
BufferBlocks = (char *)
ShmemInitStruct("Buffer Blocks",
......@@ -176,16 +170,14 @@ InitBufferPool(void)
block = BufferBlocks;
/*
* link the buffers into a circular, doubly-linked list to
* initialize free list, and initialize the buffer headers. Still
* don't know anything about replacement strategy in this file.
* link the buffers into a single linked list. This will become the
* LiFo list of unused buffers returned by StragegyGetBuffer().
*/
for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++)
{
Assert(ShmemIsValid((unsigned long) block));
buf->freeNext = i + 1;
buf->freePrev = i - 1;
buf->bufNext = i + 1;
CLEAR_BUFFERTAG(&(buf->tag));
buf->buf_id = i;
......@@ -199,14 +191,12 @@ InitBufferPool(void)
buf->wait_backend_id = 0;
}
/* close the circular queue */
BufferDescriptors[0].freePrev = Data_Descriptors - 1;
BufferDescriptors[Data_Descriptors - 1].freeNext = 0;
/* Correct last entry */
BufferDescriptors[Data_Descriptors - 1].bufNext = -1;
}
/* Init other shared buffer-management stuff */
InitBufTable();
InitFreeList(!foundDescs);
StrategyInitialize(!foundDescs);
LWLockRelease(BufMgrLock);
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.31 2003/11/13 05:34:58 wieck Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.32 2003/11/13 14:57:15 wieck Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -38,7 +38,7 @@ static HTAB *SharedBufHash;
* Initialize shmem hash table for mapping buffers
*/
void
InitBufTable(void)
InitBufTable(int size)
{
HASHCTL info;
......@@ -50,7 +50,7 @@ InitBufTable(void)
info.hash = tag_hash;
SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table",
NBuffers, NBuffers,
size, size,
&info,
HASH_ELEM | HASH_FUNCTION);
......@@ -58,79 +58,63 @@ InitBufTable(void)
elog(FATAL, "could not initialize shared buffer hash table");
}
BufferDesc *
/*
* BufTableLookup
*/
int
BufTableLookup(BufferTag *tagPtr)
{
BufferLookupEnt *result;
if (tagPtr->blockNum == P_NEW)
return NULL;
return -1;
result = (BufferLookupEnt *)
hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL);
if (!result)
return NULL;
return -1;
return &(BufferDescriptors[result->id]);
return result->id;
}
/*
* BufTableDelete
*/
bool
BufTableDelete(BufferDesc *buf)
BufTableInsert(BufferTag *tagPtr, Buffer buf_id)
{
BufferLookupEnt *result;
/*
* buffer not initialized or has been removed from table already.
* BM_DELETED keeps us from removing buffer twice.
*/
if (buf->flags & BM_DELETED)
return TRUE;
buf->flags |= BM_DELETED;
bool found;
result = (BufferLookupEnt *)
hash_search(SharedBufHash, (void *) &(buf->tag), HASH_REMOVE, NULL);
hash_search(SharedBufHash, (void *) tagPtr, HASH_ENTER, &found);
if (!result) /* shouldn't happen */
elog(ERROR, "shared buffer hash table corrupted");
if (!result)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
/*
* Clear the buffer's tag. This doesn't matter for the hash table,
* since the buffer is already removed from it, but it ensures that
* sequential searches through the buffer table won't think the buffer
* is still valid for its old page.
*/
buf->tag.rnode.relNode = InvalidOid;
buf->tag.rnode.tblNode = InvalidOid;
if (found) /* found something else in the table? */
elog(ERROR, "shared buffer hash table corrupted");
result->id = buf_id;
return TRUE;
}
/*
* BufTableDelete
*/
bool
BufTableInsert(BufferDesc *buf)
BufTableDelete(BufferTag *tagPtr)
{
BufferLookupEnt *result;
bool found;
/* cannot insert it twice */
Assert(buf->flags & BM_DELETED);
buf->flags &= ~(BM_DELETED);
result = (BufferLookupEnt *)
hash_search(SharedBufHash, (void *) &(buf->tag), HASH_ENTER, &found);
if (!result)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
hash_search(SharedBufHash, (void *) tagPtr, HASH_REMOVE, NULL);
if (found) /* found something else in the table? */
if (!result) /* shouldn't happen */
elog(ERROR, "shared buffer hash table corrupted");
result->id = buf->buf_id;
return TRUE;
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.143 2003/11/13 05:34:58 wieck Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.144 2003/11/13 14:57:15 wieck Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -260,12 +260,8 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
if (status == SM_FAIL)
{
/* IO Failed. cleanup the data structures and go home */
StrategyInvalidateBuffer(bufHdr);
if (!BufTableDelete(bufHdr))
{
LWLockRelease(BufMgrLock);
elog(FATAL, "buffer table broken after I/O error");
}
/* remember that BufferAlloc() pinned the buffer */
UnpinBuffer(bufHdr);
......@@ -318,7 +314,7 @@ BufferAlloc(Relation reln,
INIT_BUFFERTAG(&newTag, reln, blockNum);
/* see if the block is in the buffer pool already */
buf = BufTableLookup(&newTag);
buf = StrategyBufferLookup(&newTag, false);
if (buf != NULL)
{
/*
......@@ -379,7 +375,7 @@ BufferAlloc(Relation reln,
inProgress = FALSE;
for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;)
{
buf = GetFreeBuffer();
buf = StrategyGetBuffer();
/* GetFreeBuffer will abort if it can't find a free buffer */
Assert(buf);
......@@ -492,7 +488,7 @@ BufferAlloc(Relation reln,
* we haven't gotten around to insert the new tag into the
* buffer table. So we need to check here. -ay 3/95
*/
buf2 = BufTableLookup(&newTag);
buf2 = StrategyBufferLookup(&newTag, true);
if (buf2 != NULL)
{
/*
......@@ -535,29 +531,12 @@ BufferAlloc(Relation reln,
*/
/*
* Change the name of the buffer in the lookup table:
*
* Need to update the lookup table before the read starts. If someone
* comes along looking for the buffer while we are reading it in, we
* don't want them to allocate a new buffer. For the same reason, we
* didn't want to erase the buf table entry for the buffer we were
* writing back until now, either.
* Tell the buffer replacement strategy that we are replacing the
* buffer content. Then rename the buffer.
*/
if (!BufTableDelete(buf))
{
LWLockRelease(BufMgrLock);
elog(FATAL, "buffer wasn't in the buffer hash table");
}
StrategyReplaceBuffer(buf, reln, blockNum);
INIT_BUFFERTAG(&(buf->tag), reln, blockNum);
if (!BufTableInsert(buf))
{
LWLockRelease(BufMgrLock);
elog(FATAL, "buffer in buffer hash table twice");
}
/*
* Buffer contents are currently invalid. Have to mark IO IN PROGRESS
* so no one fiddles with them until the read completes. If this
......@@ -709,13 +688,28 @@ BufferSync(void)
BufferDesc *bufHdr;
ErrorContextCallback errcontext;
int num_buffer_dirty;
int *buffer_dirty;
/* Setup error traceback support for ereport() */
errcontext.callback = buffer_write_error_callback;
errcontext.arg = NULL;
errcontext.previous = error_context_stack;
error_context_stack = &errcontext;
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
/*
* Get a list of all currently dirty buffers and how many there are.
* We do not flush buffers that get dirtied after we started. They
* have to wait until the next checkpoint.
*/
buffer_dirty = (int *)palloc(NBuffers * sizeof(int));
num_buffer_dirty = 0;
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
num_buffer_dirty = StrategyDirtyBufferList(buffer_dirty, NBuffers);
LWLockRelease(BufMgrLock);
for (i = 0; i < num_buffer_dirty; i++)
{
Buffer buffer;
int status;
......@@ -723,10 +717,11 @@ BufferSync(void)
XLogRecPtr recptr;
Relation reln;
errcontext.arg = bufHdr;
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
bufHdr = &BufferDescriptors[buffer_dirty[i]];
errcontext.arg = bufHdr;
if (!(bufHdr->flags & BM_VALID))
{
LWLockRelease(BufMgrLock);
......@@ -855,6 +850,8 @@ BufferSync(void)
RelationDecrementReferenceCount(reln);
}
pfree(buffer_dirty);
/* Pop the error context stack */
error_context_stack = errcontext.previous;
}
......@@ -959,9 +956,9 @@ AtEOXact_Buffers(bool isCommit)
if (isCommit)
elog(WARNING,
"buffer refcount leak: [%03d] (freeNext=%d, freePrev=%d, "
"buffer refcount leak: [%03d] (bufNext=%d, "
"rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)",
i, buf->freeNext, buf->freePrev,
i, buf->bufNext,
buf->tag.rnode.tblNode, buf->tag.rnode.relNode,
buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i]);
......@@ -1229,7 +1226,7 @@ recheck:
/*
* And mark the buffer as no longer occupied by this rel.
*/
BufTableDelete(bufHdr);
StrategyInvalidateBuffer(bufHdr);
}
}
......@@ -1295,7 +1292,7 @@ recheck:
/*
* And mark the buffer as no longer occupied by this page.
*/
BufTableDelete(bufHdr);
StrategyInvalidateBuffer(bufHdr);
}
}
......@@ -1543,7 +1540,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
return -2;
}
if (bufHdr->tag.blockNum >= firstDelBlock)
BufTableDelete(bufHdr);
StrategyInvalidateBuffer(bufHdr);
}
}
......
......@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.168 2003/11/13 05:34:58 wieck Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.169 2003/11/13 14:57:15 wieck Exp $
*
*--------------------------------------------------------------------
*/
......@@ -73,6 +73,7 @@ extern int CheckPointTimeout;
extern int CommitDelay;
extern int CommitSiblings;
extern char *preload_libraries_string;
extern int BufferStrategyStatInterval;
#ifdef HAVE_SYSLOG
extern char *Syslog_facility;
......@@ -1190,6 +1191,15 @@ static struct config_int ConfigureNamesInt[] =
-1, -1, INT_MAX / 1000, NULL, NULL
},
{
{"buffer_strategy_status_interval", PGC_POSTMASTER, RESOURCES_MEM,
gettext_noop("Interval to report buffer strategy status in seconds"),
NULL
},
&BufferStrategyStatInterval,
0, 0, 600, NULL, NULL
},
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL
......
......@@ -58,6 +58,7 @@
#shared_buffers = 1000 # min 16, at least max_connections*2, 8KB each
#sort_mem = 1024 # min 64, size in KB
#vacuum_mem = 8192 # min 1024, size in KB
#buffer_strategy_status_interval = 0 # 0-600 seconds
# - Free Space Map -
......
......@@ -12,7 +12,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: miscadmin.h,v 1.136 2003/11/13 05:34:58 wieck Exp $
* $Id: miscadmin.h,v 1.137 2003/11/13 14:57:15 wieck Exp $
*
* NOTES
* some of the information in this file should be moved to
......@@ -96,6 +96,13 @@ extern void ProcessInterrupts(void);
CritSectionCount--; \
} while(0)
#define PG_DELAY(_msec) \
{ \
struct timeval delay; \
delay.tv_sec = (_msec) / 1000; \
delay.tv_usec = ((_msec) % 1000) * 1000; \
(void) select(0, NULL, NULL, NULL, &delay); \
}
/*****************************************************************************
* globals.h -- *
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: buf_internals.h,v 1.63 2003/11/13 05:34:58 wieck Exp $
* $Id: buf_internals.h,v 1.64 2003/11/13 14:57:15 wieck Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -72,17 +72,29 @@ typedef struct buftag
(a)->rnode = (xx_reln)->rd_node \
)
#define BUFFERTAG_EQUALS(a,xx_reln,xx_blockNum) \
( \
(a)->rnode.tblNode == (xx_reln)->rd_node.tblNode && \
(a)->rnode.relNode == (xx_reln)->rd_node.relNode && \
(a)->blockNum == (xx_blockNum) \
)
#define BUFFERTAGS_EQUAL(a,b) \
( \
(a)->rnode.tblNode == (b)->rnode.tblNode && \
(a)->rnode.relNode == (b)->rnode.relNode && \
(a)->blockNum == (b)->blockNum \
)
/*
* BufferDesc -- shared buffer cache metadata for a single
* shared buffer descriptor.
*/
typedef struct sbufdesc
{
Buffer freeNext; /* links for freelist chain */
Buffer freePrev;
Buffer bufNext; /* link in freelist chain */
SHMEM_OFFSET data; /* pointer to data in buf pool */
/* tag and id must be together for table lookup (still true?) */
/* tag and id must be together for table lookup */
BufferTag tag; /* file/block identifier */
int buf_id; /* buffer's index number (from 0) */
......@@ -107,6 +119,7 @@ typedef struct sbufdesc
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
/*
* Each backend has its own BufferLocks[] array holding flag bits
* showing what locks it has set on each buffer.
......@@ -167,14 +180,19 @@ extern long int LocalBufferFlushCount;
/*freelist.c*/
extern void PinBuffer(BufferDesc *buf);
extern void UnpinBuffer(BufferDesc *buf);
extern BufferDesc *GetFreeBuffer(void);
extern void InitFreeList(bool init);
extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck);
extern BufferDesc *StrategyGetBuffer(void);
extern void StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum);
extern void StrategyInvalidateBuffer(BufferDesc *buf);
extern void StrategyHintVacuum(bool vacuum_active);
extern int StrategyDirtyBufferList(int *buffer_dirty, int max_buffers);
extern void StrategyInitialize(bool init);
/* buf_table.c */
extern void InitBufTable(void);
extern BufferDesc *BufTableLookup(BufferTag *tagPtr);
extern bool BufTableDelete(BufferDesc *buf);
extern bool BufTableInsert(BufferDesc *buf);
extern void InitBufTable(int size);
extern int BufTableLookup(BufferTag *tagPtr);
extern bool BufTableInsert(BufferTag *tagPtr, Buffer buf_id);
extern bool BufTableDelete(BufferTag *tagPtr);
/* bufmgr.c */
extern BufferDesc *BufferDescriptors;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册