nbtree.h 9.9 KB
Newer Older
1 2 3 4 5 6 7 8
/*-------------------------------------------------------------------------
 *
 * nbtree.h--
 *    header file for postgres btree access method implementation.
 *
 *
 * Copyright (c) 1994, Regents of the University of California
 *
9
 * $Id: nbtree.h,v 1.10 1997/03/18 18:41:16 scrappy Exp $
10 11 12 13 14 15
 *
 *-------------------------------------------------------------------------
 */
#ifndef	NBTREE_H
#define	NBTREE_H

16 17 18 19 20 21 22 23
#include <access/sdir.h>
#include <access/relscan.h>
#include <storage/itemid.h>
#include <storage/page.h>
#include <access/funcindex.h>
#include <access/itup.h>
#include <storage/buf.h>
#include <storage/itemptr.h>
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70

/*
 *  BTPageOpaqueData -- At the end of every page, we store a pointer
 *  to both siblings in the tree.  See Lehman and Yao's paper for more
 *  info.  In addition, we need to know what sort of page this is
 *  (leaf or internal), and whether the page is available for reuse.
 *
 *  Lehman and Yao's algorithm requires a ``high key'' on every page.
 *  The high key on a page is guaranteed to be greater than or equal
 *  to any key that appears on this page.  Our insertion algorithm
 *  guarantees that we can use the initial least key on our right
 *  sibling as the high key.  We allocate space for the line pointer
 *  to the high key in the opaque data at the end of the page.
 *
 *  Rightmost pages in the tree have no high key.
 */

typedef struct BTPageOpaqueData {
    BlockNumber	btpo_prev;
    BlockNumber	btpo_next;
    uint16	btpo_flags;

#define BTP_LEAF	(1 << 0)
#define BTP_ROOT	(1 << 1)
#define BTP_FREE	(1 << 2)
#define BTP_META	(1 << 3)

} BTPageOpaqueData;

typedef BTPageOpaqueData	*BTPageOpaque;

/*
 *  ScanOpaqueData is used to remember which buffers we're currently
 *  examining in the scan.  We keep these buffers locked and pinned
 *  and recorded in the opaque entry of the scan in order to avoid
 *  doing a ReadBuffer() for every tuple in the index.  This avoids
 *  semop() calls, which are expensive.
 *
 *  And it's used to remember actual scankey info (we need in it
 *  if some scankeys evaled at runtime.
 */

typedef struct BTScanOpaqueData {
    Buffer	btso_curbuf;
    Buffer	btso_mrkbuf;
    uint16	qual_ok;		/* 0 for quals like key == 1 && key > 2 */
    uint16	numberOfKeys;		/* number of key attributes */
71
    uint16	numberOfFirstKeys;	/* number of first key attributes */
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
    ScanKey	keyData;		/* key descriptor */
} BTScanOpaqueData;

typedef BTScanOpaqueData	*BTScanOpaque;

/*
 *  BTItems are what we store in the btree.  Each item has an index
 *  tuple, including key and pointer values.  In addition, we must
 *  guarantee that all tuples in the index are unique, in order to
 *  satisfy some assumptions in Lehman and Yao.  The way that we do
 *  this is by generating a new OID for every insertion that we do in
 *  the tree.  This adds eight bytes to the size of btree index
 *  tuples.  Note that we do not use the OID as part of a composite
 *  key; the OID only serves as a unique identifier for a given index
 *  tuple (logical position within a page).
 */

typedef struct BTItemData {
    Oid				bti_oid;
    int32			bti_dummy;	/* padding to make bti_itup
						 * align at 8-byte boundary
						 */
    IndexTupleData		bti_itup;
} BTItemData;

typedef BTItemData	*BTItem;

/*
 *  BTStackData -- As we descend a tree, we push the (key, pointer)
 *  pairs from internal nodes onto a private stack.  If we split a
 *  leaf, we use this stack to walk back up the tree and insert data
 *  into parent nodes (and possibly to split them, too).  Lehman and
 *  Yao's update algorithm guarantees that under no circumstances can
 *  our private stack give us an irredeemably bad picture up the tree.
 *  Again, see the paper for details.
 */

typedef struct BTStackData {
    BlockNumber		bts_blkno;
    OffsetNumber	bts_offset;
    BTItem		bts_btitem;
    struct BTStackData	*bts_parent;
} BTStackData;

typedef BTStackData	*BTStack;

118 119 120 121 122 123 124 125 126 127 128
typedef struct BTPageState {
    Buffer		btps_buf;
    Page		btps_page;
    BTItem		btps_lastbti;
    OffsetNumber	btps_lastoff;
    OffsetNumber	btps_firstoff;
    int			btps_level;
    bool		btps_doupper;
    struct BTPageState	*btps_next;
} BTPageState;

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
/*
 *  We need to be able to tell the difference between read and write
 *  requests for pages, in order to do locking correctly.
 */

#define	BT_READ		0
#define	BT_WRITE	1

/*
 *  Similarly, the difference between insertion and non-insertion binary
 *  searches on a given page makes a difference when we're descending the
 *  tree.
 */

#define BT_INSERTION	0
#define BT_DESCENT	1

146 147 148 149 150 151 152
/*
 *  We must classify index modification types for the benefit of
 *  _bt_adjscans.
 */
#define BT_INSERT	0
#define	BT_DELETE	1

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
/*
 *  In general, the btree code tries to localize its knowledge about
 *  page layout to a couple of routines.  However, we need a special
 *  value to indicate "no page number" in those places where we expect
 *  page numbers.
 */

#define P_NONE		0
#define	P_LEFTMOST(opaque)	((opaque)->btpo_prev == P_NONE)
#define	P_RIGHTMOST(opaque)	((opaque)->btpo_next == P_NONE)

#define	P_HIKEY		((OffsetNumber) 1)
#define	P_FIRSTKEY	((OffsetNumber) 2)

/*
 *  Strategy numbers -- ordering of these is <, <=, =, >=, > 
 */

#define BTLessStrategyNumber		1
#define BTLessEqualStrategyNumber	2
#define BTEqualStrategyNumber		3
#define BTGreaterEqualStrategyNumber	4
#define BTGreaterStrategyNumber		5
#define BTMaxStrategyNumber		5

/*
 *  When a new operator class is declared, we require that the user
 *  supply us with an amproc procedure for determining whether, for
 *  two keys a and b, a < b, a = b, or a > b.  This routine must
 *  return < 0, 0, > 0, respectively, in these three cases.  Since we
 *  only have one such proc in amproc, it's number 1.
 */

#define BTORDER_PROC	1


/*
 * prototypes for functions in nbtinsert.c
 */
192
extern InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem,
193
				      bool index_is_unique, Relation heapRel);
194 195

				/* default is to allow duplicates */
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
extern bool _bt_itemcmp(Relation rel, Size keysz, BTItem item1, BTItem item2,
			StrategyNumber strat);

/*
 * prototypes for functions in nbtpage.c
 */
extern void _bt_metapinit(Relation rel);
extern void _bt_checkmeta(Relation rel);
extern Buffer _bt_getroot(Relation rel, int access);
extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
extern void _bt_relbuf(Relation rel, Buffer buf, int access);
extern void _bt_wrtbuf(Relation rel, Buffer buf);
extern void _bt_wrtnorelbuf(Relation rel, Buffer buf);
extern void _bt_pageinit(Page page, Size size);
extern void _bt_metaproot(Relation rel, BlockNumber rootbknum);
extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
extern void _bt_setpagelock(Relation rel, BlockNumber blkno, int access);
extern void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access);
extern void _bt_pagedel(Relation rel, ItemPointer tid);

/*
 * prototypes for functions in nbtree.c
 */
extern bool BuildingBtree;	/* in nbtree.c */

extern void btbuild(Relation heap, Relation index, int natts,
	AttrNumber *attnum, IndexStrategy istrat, uint16 pcount,
	Datum *params, FuncIndexInfo *finfo, PredInfo *predInfo);
extern InsertIndexResult btinsert(Relation rel, Datum *datum, char *nulls,
225
				  ItemPointer ht_ctid, Relation heapRel);
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
extern char *btgettuple(IndexScanDesc scan, ScanDirection dir);
extern char *btbeginscan(Relation rel, bool fromEnd, uint16 keysz,
			 ScanKey scankey);

extern void btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey);
extern void btmovescan(IndexScanDesc scan, Datum v);
extern void btendscan(IndexScanDesc scan);
extern void btmarkpos(IndexScanDesc scan);
extern void btrestrpos(IndexScanDesc scan);
extern void btdelete(Relation rel, ItemPointer tid);

/*
 * prototypes for functions in nbtscan.c
 */
extern void _bt_regscan(IndexScanDesc scan);
extern void _bt_dropscan(IndexScanDesc scan);
242
extern void _bt_adjscans(Relation rel, ItemPointer tid, int op);
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257

/*
 * prototypes for functions in nbtsearch.c
 */
extern BTStack _bt_search(Relation rel, int keysz, ScanKey scankey,
			  Buffer *bufP);
extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
			    ScanKey scankey, int access);
extern bool _bt_skeycmp(Relation rel, Size keysz, ScanKey scankey,
			Page page, ItemId itemid, StrategyNumber strat);
extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
				ScanKey scankey, int srchtype);
extern RetrieveIndexResult _bt_next(IndexScanDesc scan, ScanDirection dir);
extern RetrieveIndexResult _bt_first(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
258
extern int _bt_compare(Relation rel, TupleDesc itupdesc, Page page, int keysz, ScanKey scankey, OffsetNumber offnum);
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273

/*
 * prototypes for functions in nbtstrat.c
 */
extern StrategyNumber _bt_getstrat(Relation rel, AttrNumber attno,
				   RegProcedure proc);
extern bool _bt_invokestrat(Relation rel, AttrNumber attno,
			    StrategyNumber strat, Datum left, Datum right);

/*
 * prototypes for functions in nbtutils.c
 */
extern ScanKey  _bt_mkscankey(Relation rel, IndexTuple itup);
extern void _bt_freeskey(ScanKey skey);
extern void _bt_freestack(BTStack stack);
274
extern void _bt_orderkeys(Relation relation, BTScanOpaque so);
275
extern bool _bt_checkqual(IndexScanDesc scan, IndexTuple itup);
276
extern bool _bt_checkforkeys(IndexScanDesc scan, IndexTuple itup, Size keysz);
277 278 279 280 281
extern BTItem _bt_formitem(IndexTuple itup);

/*
 * prototypes for functions in nbtsort.c
 */
282
extern void *_bt_spoolinit(Relation index, int ntapes, bool isunique);
283 284 285 286
extern void *_bt_pagestate(Relation index, int flags, int level, bool doupper);
extern BTItem _bt_minitem(Page opage, BlockNumber oblkno, int atend);
extern BTItem _bt_buildadd(Relation index, void *pstate, BTItem bti, int flags);
extern void _bt_uppershutdown(Relation index, BTPageState *state);
287 288
extern void _bt_spooldestroy(void *spool);
extern void _bt_spool(Relation index, BTItem btitem, void *spool);
289
extern void _bt_upperbuild(Relation index);
290 291 292
extern void _bt_leafbuild(Relation index, void *spool);

#endif	/* NBTREE_H */