buf_internals.h 7.6 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * buf_internals.h
4
 *	  Internal definitions for buffer manager and the buffer replacement
B
Bruce Momjian 已提交
5
 *	  strategy.
6 7
 *
 *
8
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
9
 * Portions Copyright (c) 1994, Regents of the University of California
10
 *
11
 * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.89 2007/01/05 22:19:57 momjian Exp $
12 13 14
 *
 *-------------------------------------------------------------------------
 */
15
#ifndef BUFMGR_INTERNALS_H
16 17
#define BUFMGR_INTERNALS_H

18
#include "storage/buf.h"
19
#include "storage/lwlock.h"
20
#include "storage/shmem.h"
21
#include "storage/smgr.h"
22
#include "storage/spin.h"
23
#include "utils/relcache.h"
24
#include "utils/rel.h"
25

26

27 28
/*
 * Flags for buffer descriptors
29 30 31
 *
 * Note: TAG_VALID essentially means that there is a buffer hashtable
 * entry associated with the buffer's tag.
32
 */
B
Bruce Momjian 已提交
33 34
#define BM_DIRTY				(1 << 0)		/* data needs writing */
#define BM_VALID				(1 << 1)		/* data is valid */
35
#define BM_TAG_VALID			(1 << 2)		/* tag is assigned */
B
Bruce Momjian 已提交
36
#define BM_IO_IN_PROGRESS		(1 << 3)		/* read or write in progress */
37
#define BM_IO_ERROR				(1 << 4)		/* previous I/O failed */
B
Bruce Momjian 已提交
38 39
#define BM_JUST_DIRTIED			(1 << 5)		/* dirtied since write started */
#define BM_PIN_COUNT_WAITER		(1 << 6)		/* have waiter for sole pin */
40
// unused #define BM_CHECKPOINT_NEEDED	(1 << 7)		/* must write for checkpoint */
41

42
typedef bits16 BufFlags;
43

44 45 46 47 48 49 50 51 52 53
/*
 * The maximum allowed value of usage_count represents a tradeoff between
 * accuracy and speed of the clock-sweep buffer management algorithm.  A
 * large value (comparable to NBuffers) would approximate LRU semantics.
 * But it can take as many as BM_MAX_USAGE_COUNT+1 complete cycles of
 * clock sweeps to find a free buffer, so in practice we don't want the
 * value to be very large.
 */
#define BM_MAX_USAGE_COUNT	5

54 55 56 57
/*
 * Buffer tag identifies which disk block the buffer contains.
 *
 * Note: the BufferTag data must be sufficient to determine where to write the
58 59 60 61
 * block, without reference to pg_class or pg_tablespace entries.  It's
 * possible that the backend flushing the buffer doesn't even believe the
 * relation is visible yet (its xact may have started before the xact that
 * created the rel).  The storage manager must be able to cope anyway.
62 63 64
 *
 * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
 * to be fixed to zero them, since this struct is used as a hash key.
65
 */
T
Tom Lane 已提交
66
typedef struct buftag
67
{
68
	RelFileNode rnode;			/* physical relation identifier */
69
	BlockNumber blockNum;		/* blknum relative to begin of reln */
T
Tom Lane 已提交
70
} BufferTag;
71

72 73
#define CLEAR_BUFFERTAG(a) \
( \
74 75
	(a).rnode.spcNode = InvalidOid, \
	(a).rnode.dbNode = InvalidOid, \
76 77
	(a).rnode.relNode = InvalidOid, \
	(a).blockNum = InvalidBlockNumber \
78
)
79 80

#define INIT_BUFFERTAG(a,xx_reln,xx_blockNum) \
81
( \
82 83
	(a).rnode = (xx_reln)->rd_node, \
	(a).blockNum = (xx_blockNum) \
84 85
)

J
Jan Wieck 已提交
86 87
#define BUFFERTAGS_EQUAL(a,b) \
( \
88 89
	RelFileNodeEquals((a).rnode, (b).rnode) && \
	(a).blockNum == (b).blockNum \
J
Jan Wieck 已提交
90 91
)

92 93 94 95 96 97 98 99 100 101 102
/*
 * The shared buffer mapping table is partitioned to reduce contention.
 * To determine which partition lock a given tag requires, compute the tag's
 * hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
 * NB: NUM_BUFFER_PARTITIONS must be a power of 2!
 */
#define BufTableHashPartition(hashcode) \
	((hashcode) % NUM_BUFFER_PARTITIONS)
#define BufMappingPartitionLock(hashcode) \
	((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))

103
/*
104
 *	BufferDesc -- shared descriptor/state data for a single shared buffer.
105 106
 *
 * Note: buf_hdr_lock must be held to examine or change the tag, flags,
107
 * usage_count, refcount, or wait_backend_pid fields.  buf_id field never
B
Bruce Momjian 已提交
108
 * changes after initialization, so does not need locking.	freeNext is
109
 * protected by the BufFreelistLock not buf_hdr_lock.  The LWLocks can take
B
Bruce Momjian 已提交
110
 * care of themselves.	The buf_hdr_lock is *not* used to control access to
111 112 113 114 115 116 117 118 119 120
 * the data in the buffer!
 *
 * An exception is that if we have the buffer pinned, its tag can't change
 * underneath us, so we can examine the tag without locking the spinlock.
 * Also, in places we do one-time reads of the flags without bothering to
 * lock the spinlock; this is generally for situations where we don't expect
 * the flag bit being tested to be changing.
 *
 * We can't physically remove items from a disk page if another backend has
 * the buffer pinned.  Hence, a backend may need to wait for all other pins
B
Bruce Momjian 已提交
121
 * to go away.	This is signaled by storing its own PID into
122
 * wait_backend_pid and setting flag bit BM_PIN_COUNT_WAITER.  At present,
123 124 125 126
 * there can be only one such waiter per buffer.
 *
 * We use this same struct for local buffer headers, but the lock fields
 * are not used and not all of the flag bits are useful either.
127
 */
T
Tom Lane 已提交
128
typedef struct sbufdesc
129
{
130
	BufferTag	tag;			/* ID of page contained in buffer */
T
Tom Lane 已提交
131
	BufFlags	flags;			/* see bit definitions above */
132
	uint16		usage_count;	/* usage counter for clock sweep code */
133
	unsigned	refcount;		/* # of backends holding pins on buffer */
B
Bruce Momjian 已提交
134
	int			wait_backend_pid;		/* backend PID of pin-count waiter */
135 136 137 138 139

	slock_t		buf_hdr_lock;	/* protects the above fields */

	int			buf_id;			/* buffer's index number (from 0) */
	int			freeNext;		/* link in freelist chain */
140

141
	LWLockId	io_in_progress_lock;	/* to wait for I/O to complete */
142
	LWLockId	content_lock;	/* to lock access to buffer contents */
T
Tom Lane 已提交
143
} BufferDesc;
144

145 146
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)

147 148 149 150 151 152
/*
 * The freeNext field is either the index of the next freelist entry,
 * or one of these special values:
 */
#define FREENEXT_END_OF_LIST	(-1)
#define FREENEXT_NOT_IN_LIST	(-2)
153

154
/*
155 156
 * Macros for acquiring/releasing a shared buffer header's spinlock.
 * Do not apply these to local buffers!
157 158
 *
 * Note: as a general coding rule, if you are using these then you probably
159
 * need to be using a volatile-qualified pointer to the buffer header, to
160 161
 * ensure that the compiler doesn't rearrange accesses to the header to
 * occur before or after the spinlock is acquired/released.
162
 */
163 164
#define LockBufHdr(bufHdr)		SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
#define UnlockBufHdr(bufHdr)	SpinLockRelease(&(bufHdr)->buf_hdr_lock)
165 166 167


/* in buf_init.c */
168
extern PGDLLIMPORT volatile BufferDesc *BufferDescriptors;
169

170 171
/* in localbuf.c */
extern BufferDesc *LocalBufferDescriptors;
B
Bruce Momjian 已提交
172

173 174 175 176
/* in freelist.c */
extern bool strategy_hint_vacuum;

/* event counters in buf_init.c */
B
Bruce Momjian 已提交
177 178 179 180 181 182
extern long int ReadBufferCount;
extern long int ReadLocalBufferCount;
extern long int BufferHitCount;
extern long int LocalBufferHitCount;
extern long int BufferFlushCount;
extern long int LocalBufferFlushCount;
183 184


185
/*
186
 * Internal routines: only called by bufmgr
187 188
 */

189
/* freelist.c */
190 191
extern volatile BufferDesc *StrategyGetBuffer(void);
extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head);
192
extern int	StrategySyncStart(void);
193
extern Size StrategyShmemSize(void);
J
Jan Wieck 已提交
194
extern void StrategyInitialize(bool init);
195 196

/* buf_table.c */
B
Bruce Momjian 已提交
197
extern Size BufTableShmemSize(int size);
J
Jan Wieck 已提交
198
extern void InitBufTable(int size);
199 200 201 202
extern uint32 BufTableHashCode(BufferTag *tagPtr);
extern int	BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
extern int	BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
203 204

/* localbuf.c */
205 206
/*extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
  bool *foundPtr);*/
207
extern BufferDesc *LocalBufferAlloc(SMgrRelation reln, BlockNumber blockNum,
208
				 bool *foundPtr);
209
extern void MarkLocalBufferDirty(Buffer buffer);
210
extern void DropRelFileNodeLocalBuffers(RelFileNode rnode,
211
							BlockNumber firstDelBlock);
212
extern void AtEOXact_LocalBuffers(bool isCommit);
213

214
#endif   /* BUFMGR_INTERNALS_H */