nbtree.h 9.9 KB
Newer Older
1 2 3 4 5 6 7 8
/*-------------------------------------------------------------------------
 *
 * nbtree.h--
 *    header file for postgres btree access method implementation.
 *
 *
 * Copyright (c) 1994, Regents of the University of California
 *
9
 * $Id: nbtree.h,v 1.14 1997/08/19 21:37:35 momjian Exp $
10 11 12 13 14 15
 *
 *-------------------------------------------------------------------------
 */
#ifndef	NBTREE_H
#define	NBTREE_H

16 17 18 19 20 21 22 23
#include <access/sdir.h>
#include <access/relscan.h>
#include <storage/itemid.h>
#include <storage/page.h>
#include <access/funcindex.h>
#include <access/itup.h>
#include <storage/buf.h>
#include <storage/itemptr.h>
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49

/*
 *  BTPageOpaqueData -- At the end of every page, we store a pointer
 *  to both siblings in the tree.  See Lehman and Yao's paper for more
 *  info.  In addition, we need to know what sort of page this is
 *  (leaf or internal), and whether the page is available for reuse.
 *
 *  Lehman and Yao's algorithm requires a ``high key'' on every page.
 *  The high key on a page is guaranteed to be greater than or equal
 *  to any key that appears on this page.  Our insertion algorithm
 *  guarantees that we can use the initial least key on our right
 *  sibling as the high key.  We allocate space for the line pointer
 *  to the high key in the opaque data at the end of the page.
 *
 *  Rightmost pages in the tree have no high key.
 */

typedef struct BTPageOpaqueData {
    BlockNumber	btpo_prev;
    BlockNumber	btpo_next;
    uint16	btpo_flags;

#define BTP_LEAF	(1 << 0)
#define BTP_ROOT	(1 << 1)
#define BTP_FREE	(1 << 2)
#define BTP_META	(1 << 3)
50
#define BTP_CHAIN	(1 << 4)
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70

} BTPageOpaqueData;

typedef BTPageOpaqueData	*BTPageOpaque;

/*
 *  ScanOpaqueData is used to remember which buffers we're currently
 *  examining in the scan.  We keep these buffers locked and pinned
 *  and recorded in the opaque entry of the scan in order to avoid
 *  doing a ReadBuffer() for every tuple in the index.  This avoids
 *  semop() calls, which are expensive.
 *
 *  And it's used to remember actual scankey info (we need in it
 *  if some scankeys evaled at runtime.
 */

typedef struct BTScanOpaqueData {
    Buffer	btso_curbuf;
    Buffer	btso_mrkbuf;
    uint16	qual_ok;		/* 0 for quals like key == 1 && key > 2 */
71 72
    uint16	numberOfKeys;		/* number of keys */
    uint16	numberOfFirstKeys;	/* number of keys for 1st attribute */
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
    ScanKey	keyData;		/* key descriptor */
} BTScanOpaqueData;

typedef BTScanOpaqueData	*BTScanOpaque;

/*
 *  BTItems are what we store in the btree.  Each item has an index
 *  tuple, including key and pointer values.  In addition, we must
 *  guarantee that all tuples in the index are unique, in order to
 *  satisfy some assumptions in Lehman and Yao.  The way that we do
 *  this is by generating a new OID for every insertion that we do in
 *  the tree.  This adds eight bytes to the size of btree index
 *  tuples.  Note that we do not use the OID as part of a composite
 *  key; the OID only serves as a unique identifier for a given index
 *  tuple (logical position within a page).
88 89 90 91 92 93
 *
 *  New comments: 
 *  actually, we must guarantee that all tuples in A LEVEL
 *  are unique, not in ALL INDEX. So, we can use bti_itup->t_tid
 *  as unique identifier for a given index tuple (logical position 
 *  within a level).	- vadim 04/09/97
94 95 96
 */

typedef struct BTItemData {
97
#ifndef BTREE_VERSION_1
98 99 100 101
    Oid				bti_oid;
    int32			bti_dummy;	/* padding to make bti_itup
						 * align at 8-byte boundary
						 */
102
#endif
103 104 105 106 107
    IndexTupleData		bti_itup;
} BTItemData;

typedef BTItemData	*BTItem;

108 109 110 111 112 113 114 115 116 117 118
#ifdef BTREE_VERSION_1
#define BTItemSame(i1, i2)    ( i1->bti_itup.t_tid.ip_blkid.bi_hi == \
				i2->bti_itup.t_tid.ip_blkid.bi_hi && \
				i1->bti_itup.t_tid.ip_blkid.bi_lo == \
				i2->bti_itup.t_tid.ip_blkid.bi_lo && \
				i1->bti_itup.t_tid.ip_posid == \
				i2->bti_itup.t_tid.ip_posid )
#else
#define BTItemSame(i1, i2)    ( i1->bti_oid == i2->bti_oid )
#endif

119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
/*
 *  BTStackData -- As we descend a tree, we push the (key, pointer)
 *  pairs from internal nodes onto a private stack.  If we split a
 *  leaf, we use this stack to walk back up the tree and insert data
 *  into parent nodes (and possibly to split them, too).  Lehman and
 *  Yao's update algorithm guarantees that under no circumstances can
 *  our private stack give us an irredeemably bad picture up the tree.
 *  Again, see the paper for details.
 */

typedef struct BTStackData {
    BlockNumber		bts_blkno;
    OffsetNumber	bts_offset;
    BTItem		bts_btitem;
    struct BTStackData	*bts_parent;
} BTStackData;

typedef BTStackData	*BTStack;

138 139 140 141 142 143 144 145 146 147 148
typedef struct BTPageState {
    Buffer		btps_buf;
    Page		btps_page;
    BTItem		btps_lastbti;
    OffsetNumber	btps_lastoff;
    OffsetNumber	btps_firstoff;
    int			btps_level;
    bool		btps_doupper;
    struct BTPageState	*btps_next;
} BTPageState;

149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
/*
 *  We need to be able to tell the difference between read and write
 *  requests for pages, in order to do locking correctly.
 */

#define	BT_READ		0
#define	BT_WRITE	1

/*
 *  Similarly, the difference between insertion and non-insertion binary
 *  searches on a given page makes a difference when we're descending the
 *  tree.
 */

#define BT_INSERTION	0
#define BT_DESCENT	1

166 167 168 169 170 171 172
/*
 *  We must classify index modification types for the benefit of
 *  _bt_adjscans.
 */
#define BT_INSERT	0
#define	BT_DELETE	1

173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
/*
 *  In general, the btree code tries to localize its knowledge about
 *  page layout to a couple of routines.  However, we need a special
 *  value to indicate "no page number" in those places where we expect
 *  page numbers.
 */

#define P_NONE		0
#define	P_LEFTMOST(opaque)	((opaque)->btpo_prev == P_NONE)
#define	P_RIGHTMOST(opaque)	((opaque)->btpo_next == P_NONE)

#define	P_HIKEY		((OffsetNumber) 1)
#define	P_FIRSTKEY	((OffsetNumber) 2)

/*
 *  Strategy numbers -- ordering of these is <, <=, =, >=, > 
 */

#define BTLessStrategyNumber		1
#define BTLessEqualStrategyNumber	2
#define BTEqualStrategyNumber		3
#define BTGreaterEqualStrategyNumber	4
#define BTGreaterStrategyNumber		5
#define BTMaxStrategyNumber		5

/*
 *  When a new operator class is declared, we require that the user
 *  supply us with an amproc procedure for determining whether, for
 *  two keys a and b, a < b, a = b, or a > b.  This routine must
 *  return < 0, 0, > 0, respectively, in these three cases.  Since we
 *  only have one such proc in amproc, it's number 1.
 */

#define BTORDER_PROC	1

/*
 * prototypes for functions in nbtinsert.c
 */
211
extern InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem,
212
				      bool index_is_unique, Relation heapRel);
213 214

				/* default is to allow duplicates */
215 216 217 218 219 220 221 222 223 224 225 226 227
extern bool _bt_itemcmp(Relation rel, Size keysz, BTItem item1, BTItem item2,
			StrategyNumber strat);

/*
 * prototypes for functions in nbtpage.c
 */
extern void _bt_metapinit(Relation rel);
extern Buffer _bt_getroot(Relation rel, int access);
extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
extern void _bt_relbuf(Relation rel, Buffer buf, int access);
extern void _bt_wrtbuf(Relation rel, Buffer buf);
extern void _bt_wrtnorelbuf(Relation rel, Buffer buf);
extern void _bt_pageinit(Page page, Size size);
228
extern void _bt_metaproot(Relation rel, BlockNumber rootbknum, int level);
229 230 231 232 233 234 235 236 237 238 239 240
extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
extern void _bt_pagedel(Relation rel, ItemPointer tid);

/*
 * prototypes for functions in nbtree.c
 */
extern bool BuildingBtree;	/* in nbtree.c */

extern void btbuild(Relation heap, Relation index, int natts,
	AttrNumber *attnum, IndexStrategy istrat, uint16 pcount,
	Datum *params, FuncIndexInfo *finfo, PredInfo *predInfo);
extern InsertIndexResult btinsert(Relation rel, Datum *datum, char *nulls,
241
				  ItemPointer ht_ctid, Relation heapRel);
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
extern char *btgettuple(IndexScanDesc scan, ScanDirection dir);
extern char *btbeginscan(Relation rel, bool fromEnd, uint16 keysz,
			 ScanKey scankey);

extern void btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey);
extern void btmovescan(IndexScanDesc scan, Datum v);
extern void btendscan(IndexScanDesc scan);
extern void btmarkpos(IndexScanDesc scan);
extern void btrestrpos(IndexScanDesc scan);
extern void btdelete(Relation rel, ItemPointer tid);

/*
 * prototypes for functions in nbtscan.c
 */
extern void _bt_regscan(IndexScanDesc scan);
extern void _bt_dropscan(IndexScanDesc scan);
258
extern void _bt_adjscans(Relation rel, ItemPointer tid, int op);
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288

/*
 * prototypes for functions in nbtsearch.c
 */
extern BTStack _bt_search(Relation rel, int keysz, ScanKey scankey,
			  Buffer *bufP);
extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
			    ScanKey scankey, int access);
extern bool _bt_skeycmp(Relation rel, Size keysz, ScanKey scankey,
			Page page, ItemId itemid, StrategyNumber strat);
extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
				ScanKey scankey, int srchtype);
extern RetrieveIndexResult _bt_next(IndexScanDesc scan, ScanDirection dir);
extern RetrieveIndexResult _bt_first(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);

/*
 * prototypes for functions in nbtstrat.c
 */
extern StrategyNumber _bt_getstrat(Relation rel, AttrNumber attno,
				   RegProcedure proc);
extern bool _bt_invokestrat(Relation rel, AttrNumber attno,
			    StrategyNumber strat, Datum left, Datum right);

/*
 * prototypes for functions in nbtutils.c
 */
extern ScanKey  _bt_mkscankey(Relation rel, IndexTuple itup);
extern void _bt_freeskey(ScanKey skey);
extern void _bt_freestack(BTStack stack);
289
extern void _bt_orderkeys(Relation relation, BTScanOpaque so);
290
extern bool _bt_checkkeys (IndexScanDesc scan, IndexTuple tuple, Size *keysok);
291 292 293 294 295
extern BTItem _bt_formitem(IndexTuple itup);

/*
 * prototypes for functions in nbtsort.c
 */
296
extern void *_bt_spoolinit(Relation index, int ntapes, bool isunique);
297 298 299 300 301
extern void _bt_spooldestroy(void *spool);
extern void _bt_spool(Relation index, BTItem btitem, void *spool);
extern void _bt_leafbuild(Relation index, void *spool);

#endif	/* NBTREE_H */