提交 eda6dd32 编写于 作者: N Neil Conway

GiST improvements:

- make sure we always invoke user-supplied GiST methods in a short-lived
  memory context. This means the backend isn't exposed to any memory leaks
  that be in those methods (in fact, it is probably a net loss for most
  GiST methods to bother manually freeing memory now). This also means
  we can do away with a lot of ugly manual memory management in the
  GiST code itself.

- keep the current page of a GiST index scan pinned, rather than doing a
  ReadBuffer() for each tuple produced by the scan. Since ReadBuffer() is
  expensive, this is a perf. win

- implement dead tuple killing for GiST indexes (which is easy to do, now
  that we keep a pin on the current scan page). Now all the builtin indexes
  implement dead tuple killing.

- cleanup a lot of ugly code in GiST
上级 818bfda1
<!--
$PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.17 2005/04/09 03:52:43 momjian Exp $
$PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.18 2005/05/17 00:59:30 neilc Exp $
-->
<chapter id="GiST">
......@@ -202,7 +202,7 @@ $PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.17 2005/04/09 03:52:43 momjian Exp
<para>
The lack of write-ahead logging is just a small matter of programming,
but since it isn't done yet, a crash could render a <acronym>GiST</acronym>
index inconsistent, forcing a REINDEX.
index inconsistent, forcing a <command>REINDEX</command>.
</para>
</sect1>
......
此差异已折叠。
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.45 2005/03/27 23:52:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.46 2005/05/17 00:59:30 neilc Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -16,41 +16,71 @@
#include "access/gist.h"
#include "executor/execdebug.h"
#include "utils/memutils.h"
static OffsetNumber gistfindnext(IndexScanDesc s, Page p, OffsetNumber n,
ScanDirection dir);
static bool gistscancache(IndexScanDesc s, ScanDirection dir);
static bool gistfirst(IndexScanDesc s, ScanDirection dir);
static bool gistnext(IndexScanDesc s, ScanDirection dir);
static bool gistindex_keytest(IndexTuple tuple,
int scanKeySize, ScanKey key, GISTSTATE *giststate,
Relation r, Page p, OffsetNumber offset);
static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n,
ScanDirection dir);
static bool gistnext(IndexScanDesc scan, ScanDirection dir);
static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan,
OffsetNumber offset);
/*
* gistgettuple() -- Get the next tuple in the scan
*/
Datum
gistgettuple(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
bool res;
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
Page page;
OffsetNumber offnum;
GISTScanOpaque so;
/* if we have it cached in the scan desc, just return the value */
if (gistscancache(s, dir))
PG_RETURN_BOOL(true);
so = (GISTScanOpaque) scan->opaque;
/* not cached, so we'll have to do some work */
if (ItemPointerIsValid(&(s->currentItemData)))
res = gistnext(s, dir);
else
res = gistfirst(s, dir);
PG_RETURN_BOOL(res);
/*
* If we have produced an index tuple in the past and the executor
* has informed us we need to mark it as "killed", do so now.
*
* XXX: right now there is no concurrent access. In the
* future, we should (a) get a read lock on the page (b) check
* that the location of the previously-fetched tuple hasn't
* changed due to concurrent insertions.
*/
if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData)))
{
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->curbuf);
PageGetItemId(page, offnum)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(so->curbuf);
}
/*
* Get the next tuple that matches the search key. If asked to
* skip killed tuples, continue looping until we find a non-killed
* tuple that matches the search key.
*/
for (;;)
{
bool res = gistnext(scan, dir);
if (res == true && scan->ignore_killed_tuples)
{
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->curbuf);
if (ItemIdDeleted(PageGetItemId(page, offnum)))
continue;
}
PG_RETURN_BOOL(res);
}
}
Datum
gistgetmulti(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
......@@ -60,13 +90,10 @@ gistgetmulti(PG_FUNCTION_ARGS)
/* XXX generic implementation: loop around guts of gistgettuple */
while (ntids < max_tids)
{
if (ItemPointerIsValid(&(s->currentItemData)))
res = gistnext(s, ForwardScanDirection);
else
res = gistfirst(s, ForwardScanDirection);
res = gistnext(scan, ForwardScanDirection);
if (!res)
break;
tids[ntids] = s->xs_ctup.t_self;
tids[ntids] = scan->xs_ctup.t_self;
ntids++;
}
......@@ -74,166 +101,123 @@ gistgetmulti(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(res);
}
/*
* Fetch a tuple that matchs the search key; this can be invoked
* either to fetch the first such tuple or subsequent matching
* tuples. Returns true iff a matching tuple was found.
*/
static bool
gistfirst(IndexScanDesc s, ScanDirection dir)
gistnext(IndexScanDesc scan, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
OffsetNumber maxoff;
GISTPageOpaque po;
GISTScanOpaque so;
GISTSTACK *stk;
BlockNumber blk;
IndexTuple it;
so = (GISTScanOpaque) s->opaque;
so = (GISTScanOpaque) scan->opaque;
if (ItemPointerIsValid(&scan->currentItemData) == false)
{
/* Being asked to fetch the first entry, so start at the root */
Assert(so->curbuf == InvalidBuffer);
so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO);
}
b = ReadBuffer(s->indexRelation, GISTP_ROOT);
p = BufferGetPage(b);
p = BufferGetPage(so->curbuf);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
for (;;)
if (ItemPointerIsValid(&scan->currentItemData) == false)
{
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir))
n = gistfindnext(s, p, maxoff, dir);
n = PageGetMaxOffsetNumber(p);
else
n = gistfindnext(s, p, FirstOffsetNumber, dir);
while (n < FirstOffsetNumber || n > maxoff)
{
stk = so->s_stack;
if (stk == NULL)
{
ReleaseBuffer(b);
return false;
}
b = ReleaseAndReadBuffer(b, s->indexRelation, stk->gs_blk);
p = BufferGetPage(b);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(stk->gs_child);
else
n = OffsetNumberNext(stk->gs_child);
so->s_stack = stk->gs_parent;
pfree(stk);
n = gistfindnext(s, p, n, dir);
}
if (po->flags & F_LEAF)
{
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
s->xs_ctup.t_self = it->t_tid;
ReleaseBuffer(b);
return true;
}
else
{
stk = (GISTSTACK *) palloc(sizeof(GISTSTACK));
stk->gs_child = n;
stk->gs_blk = BufferGetBlockNumber(b);
stk->gs_parent = so->s_stack;
so->s_stack = stk;
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
blk = ItemPointerGetBlockNumber(&(it->t_tid));
b = ReleaseAndReadBuffer(b, s->indexRelation, blk);
p = BufferGetPage(b);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
}
n = FirstOffsetNumber;
}
}
static bool
gistnext(IndexScanDesc s, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
OffsetNumber maxoff;
GISTPageOpaque po;
GISTScanOpaque so;
GISTSTACK *stk;
BlockNumber blk;
IndexTuple it;
so = (GISTScanOpaque) s->opaque;
blk = ItemPointerGetBlockNumber(&(s->currentItemData));
n = ItemPointerGetOffsetNumber(&(s->currentItemData));
if (ScanDirectionIsForward(dir))
n = OffsetNumberNext(n);
else
n = OffsetNumberPrev(n);
{
n = ItemPointerGetOffsetNumber(&(scan->currentItemData));
b = ReadBuffer(s->indexRelation, blk);
p = BufferGetPage(b);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(n);
else
n = OffsetNumberNext(n);
}
for (;;)
{
maxoff = PageGetMaxOffsetNumber(p);
n = gistfindnext(s, p, n, dir);
n = gistfindnext(scan, n, dir);
while (n < FirstOffsetNumber || n > maxoff)
if (!OffsetNumberIsValid(n))
{
stk = so->s_stack;
if (stk == NULL)
/*
* We ran out of matching index entries on the current
* page, so pop the top stack entry and use it to continue
* the search.
*/
/* If we're out of stack entries, we're done */
if (so->stack == NULL)
{
ReleaseBuffer(b);
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
return false;
}
b = ReleaseAndReadBuffer(b, s->indexRelation, stk->gs_blk);
p = BufferGetPage(b);
stk = so->stack;
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
stk->block);
p = BufferGetPage(so->curbuf);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(stk->gs_child);
n = OffsetNumberPrev(stk->offset);
else
n = OffsetNumberNext(stk->gs_child);
n = OffsetNumberNext(stk->offset);
so->s_stack = stk->gs_parent;
so->stack = stk->parent;
pfree(stk);
n = gistfindnext(s, p, n, dir);
continue;
}
if (po->flags & F_LEAF)
{
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
/*
* We've found a matching index entry in a leaf page, so
* return success. Note that we keep "curbuf" pinned so
* that we can efficiently resume the index scan later.
*/
ItemPointerSet(&(scan->currentItemData),
BufferGetBlockNumber(so->curbuf), n);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
s->xs_ctup.t_self = it->t_tid;
ReleaseBuffer(b);
scan->xs_ctup.t_self = it->t_tid;
return true;
}
else
{
/*
* We've found an entry in an internal node whose key is
* consistent with the search key, so continue the search
* in the pointed-to child node (i.e. we search depth
* first). Push the current node onto the stack so we
* resume searching from this node later.
*/
BlockNumber child_block;
stk = (GISTSTACK *) palloc(sizeof(GISTSTACK));
stk->gs_child = n;
stk->gs_blk = BufferGetBlockNumber(b);
stk->gs_parent = so->s_stack;
so->s_stack = stk;
stk->offset = n;
stk->block = BufferGetBlockNumber(so->curbuf);
stk->parent = so->stack;
so->stack = stk;
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
blk = ItemPointerGetBlockNumber(&(it->t_tid));
child_block = ItemPointerGetBlockNumber(&(it->t_tid));
b = ReleaseAndReadBuffer(b, s->indexRelation, blk);
p = BufferGetPage(b);
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
child_block);
p = BufferGetPage(so->curbuf);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
if (ScanDirectionIsBackward(dir))
......@@ -244,19 +228,34 @@ gistnext(IndexScanDesc s, ScanDirection dir)
}
}
/* Similar to index_keytest, but decompresses the key in the IndexTuple */
/*
* Similar to index_keytest, but first decompress the key in the
* IndexTuple before passing it to the sk_func (and we have previously
* overwritten the sk_func to use the user-defined Consistent method,
* so we actually invoke that). Note that this function is always
* invoked in a short-lived memory context, so we don't need to worry
* about cleaning up allocated memory (either here or in the
* implementation of any Consistent methods).
*/
static bool
gistindex_keytest(IndexTuple tuple,
int scanKeySize,
ScanKey key,
GISTSTATE *giststate,
Relation r,
Page p,
IndexScanDesc scan,
OffsetNumber offset)
{
int keySize = scan->numberOfKeys;
ScanKey key = scan->keyData;
Relation r = scan->indexRelation;
GISTScanOpaque so;
Page p;
GISTSTATE *giststate;
so = (GISTScanOpaque) scan->opaque;
giststate = so->giststate;
p = BufferGetPage(so->curbuf);
IncrIndexProcessed();
while (scanKeySize > 0)
while (keySize > 0)
{
Datum datum;
bool isNull;
......@@ -297,53 +296,57 @@ gistindex_keytest(IndexTuple tuple,
Int32GetDatum(key->sk_strategy),
ObjectIdGetDatum(key->sk_subtype));
/* if index datum had to be decompressed, free it */
if (de.key != datum && !isAttByVal(giststate, key->sk_attno - 1))
if (DatumGetPointer(de.key) != NULL)
pfree(DatumGetPointer(de.key));
if (!DatumGetBool(test))
return false;
scanKeySize--;
keySize--;
key++;
}
return true;
}
/*
* Return the offset of the first index entry that is consistent with
* the search key after offset 'n' in the current page. If there are
* no more consistent entries, return InvalidOffsetNumber.
*/
static OffsetNumber
gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir)
gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)
{
OffsetNumber maxoff;
IndexTuple it;
GISTPageOpaque po;
GISTScanOpaque so;
GISTSTATE *giststate;
OffsetNumber maxoff;
IndexTuple it;
GISTPageOpaque po;
GISTScanOpaque so;
MemoryContext oldcxt;
Page p;
so = (GISTScanOpaque) scan->opaque;
p = BufferGetPage(so->curbuf);
maxoff = PageGetMaxOffsetNumber(p);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
so = (GISTScanOpaque) s->opaque;
giststate = so->giststate;
/*
* Make sure we're in a short-lived memory context when we invoke
* a user-supplied GiST method in gistindex_keytest(), so we don't
* leak memory
*/
oldcxt = MemoryContextSwitchTo(so->tempCxt);
/*
* If we modified the index during the scan, we may have a pointer to
* a ghost tuple, before the scan. If this is the case, back up one.
*/
if (so->s_flags & GS_CURBEFORE)
if (so->flags & GS_CURBEFORE)
{
so->s_flags &= ~GS_CURBEFORE;
so->flags &= ~GS_CURBEFORE;
n = OffsetNumberPrev(n);
}
while (n >= FirstOffsetNumber && n <= maxoff)
{
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
if (gistindex_keytest(it,
s->numberOfKeys, s->keyData, giststate,
s->indexRelation, p, n))
if (gistindex_keytest(it, scan, n))
break;
if (ScanDirectionIsBackward(dir))
......@@ -352,28 +355,16 @@ gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir)
n = OffsetNumberNext(n);
}
return n;
}
static bool
gistscancache(IndexScanDesc s, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
IndexTuple it;
MemoryContextSwitchTo(oldcxt);
MemoryContextReset(so->tempCxt);
if (!(ScanDirectionIsNoMovement(dir)
&& ItemPointerIsValid(&(s->currentItemData))))
return false;
b = ReadBuffer(s->indexRelation,
ItemPointerGetBlockNumber(&(s->currentItemData)));
p = BufferGetPage(b);
n = ItemPointerGetOffsetNumber(&(s->currentItemData));
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
s->xs_ctup.t_self = it->t_tid;
ReleaseBuffer(b);
return true;
/*
* If we found a matching entry, return its offset; otherwise
* return InvalidOffsetNumber to inform the caller to go to the
* next page.
*/
if (n >= FirstOffsetNumber && n <= maxoff)
return n;
else
return InvalidOffsetNumber;
}
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.56 2004/12/31 21:59:10 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.57 2005/05/17 00:59:30 neilc Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -17,28 +17,29 @@
#include "access/genam.h"
#include "access/gist.h"
#include "access/gistscan.h"
#include "utils/memutils.h"
#include "utils/resowner.h"
/* routines defined and used here */
static void gistregscan(IndexScanDesc s);
static void gistdropscan(IndexScanDesc s);
static void gistadjone(IndexScanDesc s, int op, BlockNumber blkno,
static void gistregscan(IndexScanDesc scan);
static void gistdropscan(IndexScanDesc scan);
static void gistadjone(IndexScanDesc scan, int op, BlockNumber blkno,
OffsetNumber offnum);
static void adjuststack(GISTSTACK *stk, BlockNumber blkno);
static void adjustiptr(IndexScanDesc s, ItemPointer iptr,
static void adjustiptr(IndexScanDesc scan, ItemPointer iptr,
int op, BlockNumber blkno, OffsetNumber offnum);
static void gistfreestack(GISTSTACK *s);
/*
* Whenever we start a GiST scan in a backend, we register it in private
* space. Then if the GiST index gets updated, we check all registered
* scans and adjust them if the tuple they point at got moved by the
* update. We only need to do this in private space, because when we update
* an GiST we have a write lock on the tree, so no other process can have
* any locks at all on it. A single transaction can have write and read
* locks on the same object, so that's why we need to handle this case.
* Whenever we start a GiST scan in a backend, we register it in
* private space. Then if the GiST index gets updated, we check all
* registered scans and adjust them if the tuple they point at got
* moved by the update. We only need to do this in private space,
* because when we update an GiST we have a write lock on the tree, so
* no other process can have any locks at all on it. A single
* transaction can have write and read locks on the same object, so
* that's why we need to handle this case.
*/
typedef struct GISTScanListData
{
IndexScanDesc gsl_scan;
......@@ -57,65 +58,77 @@ gistbeginscan(PG_FUNCTION_ARGS)
Relation r = (Relation) PG_GETARG_POINTER(0);
int nkeys = PG_GETARG_INT32(1);
ScanKey key = (ScanKey) PG_GETARG_POINTER(2);
IndexScanDesc s;
IndexScanDesc scan;
s = RelationGetIndexScan(r, nkeys, key);
scan = RelationGetIndexScan(r, nkeys, key);
gistregscan(scan);
gistregscan(s);
PG_RETURN_POINTER(s);
PG_RETURN_POINTER(scan);
}
Datum
gistrescan(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanKey key = (ScanKey) PG_GETARG_POINTER(1);
GISTScanOpaque p;
GISTScanOpaque so;
int i;
/*
* Clear all the pointers.
*/
ItemPointerSetInvalid(&s->currentItemData);
ItemPointerSetInvalid(&s->currentMarkData);
ItemPointerSetInvalid(&scan->currentItemData);
ItemPointerSetInvalid(&scan->currentMarkData);
p = (GISTScanOpaque) s->opaque;
if (p != NULL)
so = (GISTScanOpaque) scan->opaque;
if (so != NULL)
{
/* rescan an existing indexscan --- reset state */
gistfreestack(p->s_stack);
gistfreestack(p->s_markstk);
p->s_stack = p->s_markstk = NULL;
p->s_flags = 0x0;
gistfreestack(so->stack);
gistfreestack(so->markstk);
so->stack = so->markstk = NULL;
so->flags = 0x0;
/* drop pins on buffers -- no locks held */
if (BufferIsValid(so->curbuf))
{
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
}
if (BufferIsValid(so->markbuf))
{
ReleaseBuffer(so->markbuf);
so->markbuf = InvalidBuffer;
}
}
else
{
/* initialize opaque data */
p = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData));
p->s_stack = p->s_markstk = NULL;
p->s_flags = 0x0;
s->opaque = p;
p->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
initGISTstate(p->giststate, s->indexRelation);
so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData));
so->stack = so->markstk = NULL;
so->flags = 0x0;
so->tempCxt = createTempGistContext();
so->curbuf = so->markbuf = InvalidBuffer;
so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
initGISTstate(so->giststate, scan->indexRelation);
scan->opaque = so;
}
/* Update scan key, if a new one is given */
if (key && s->numberOfKeys > 0)
if (key && scan->numberOfKeys > 0)
{
memmove(s->keyData,
key,
s->numberOfKeys * sizeof(ScanKeyData));
memmove(scan->keyData, key,
scan->numberOfKeys * sizeof(ScanKeyData));
/*
* Modify the scan key so that the Consistent function is called
* for all comparisons. The original operator is passed to the
* Consistent function in the form of its strategy number, which
* is available from the sk_strategy field, and its subtype from
* the sk_subtype field.
* Modify the scan key so that all the Consistent method is
* called for all comparisons. The original operator is passed
* to the Consistent function in the form of its strategy
* number, which is available from the sk_strategy field, and
* its subtype from the sk_subtype field.
*/
for (i = 0; i < s->numberOfKeys; i++)
s->keyData[i].sk_func = p->giststate->consistentFn[s->keyData[i].sk_attno - 1];
for (i = 0; i < scan->numberOfKeys; i++)
scan->keyData[i].sk_func = so->giststate->consistentFn[scan->keyData[i].sk_attno - 1];
}
PG_RETURN_VOID();
......@@ -124,35 +137,47 @@ gistrescan(PG_FUNCTION_ARGS)
Datum
gistmarkpos(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque p;
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque so;
GISTSTACK *o,
*n,
*tmp;
s->currentMarkData = s->currentItemData;
p = (GISTScanOpaque) s->opaque;
if (p->s_flags & GS_CURBEFORE)
p->s_flags |= GS_MRKBEFORE;
scan->currentMarkData = scan->currentItemData;
so = (GISTScanOpaque) scan->opaque;
if (so->flags & GS_CURBEFORE)
so->flags |= GS_MRKBEFORE;
else
p->s_flags &= ~GS_MRKBEFORE;
so->flags &= ~GS_MRKBEFORE;
o = NULL;
n = p->s_stack;
n = so->stack;
/* copy the parent stack from the current item data */
while (n != NULL)
{
tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK));
tmp->gs_child = n->gs_child;
tmp->gs_blk = n->gs_blk;
tmp->gs_parent = o;
tmp->offset = n->offset;
tmp->block = n->block;
tmp->parent = o;
o = tmp;
n = n->gs_parent;
n = n->parent;
}
gistfreestack(p->s_markstk);
p->s_markstk = o;
gistfreestack(so->markstk);
so->markstk = o;
/* Update markbuf: make sure to bump ref count on curbuf */
if (BufferIsValid(so->markbuf))
{
ReleaseBuffer(so->markbuf);
so->markbuf = InvalidBuffer;
}
if (BufferIsValid(so->curbuf))
{
IncrBufferRefCount(so->curbuf);
so->markbuf = so->curbuf;
}
PG_RETURN_VOID();
}
......@@ -160,35 +185,47 @@ gistmarkpos(PG_FUNCTION_ARGS)
Datum
gistrestrpos(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque p;
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque so;
GISTSTACK *o,
*n,
*tmp;
s->currentItemData = s->currentMarkData;
p = (GISTScanOpaque) s->opaque;
if (p->s_flags & GS_MRKBEFORE)
p->s_flags |= GS_CURBEFORE;
scan->currentItemData = scan->currentMarkData;
so = (GISTScanOpaque) scan->opaque;
if (so->flags & GS_MRKBEFORE)
so->flags |= GS_CURBEFORE;
else
p->s_flags &= ~GS_CURBEFORE;
so->flags &= ~GS_CURBEFORE;
o = NULL;
n = p->s_markstk;
n = so->markstk;
/* copy the parent stack from the current item data */
while (n != NULL)
{
tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK));
tmp->gs_child = n->gs_child;
tmp->gs_blk = n->gs_blk;
tmp->gs_parent = o;
tmp->offset = n->offset;
tmp->block = n->block;
tmp->parent = o;
o = tmp;
n = n->gs_parent;
n = n->parent;
}
gistfreestack(p->s_stack);
p->s_stack = o;
gistfreestack(so->stack);
so->stack = o;
/* Update curbuf: be sure to bump ref count on markbuf */
if (BufferIsValid(so->curbuf))
{
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
}
if (BufferIsValid(so->markbuf))
{
IncrBufferRefCount(so->markbuf);
so->curbuf = so->markbuf;
}
PG_RETURN_VOID();
}
......@@ -196,52 +233,57 @@ gistrestrpos(PG_FUNCTION_ARGS)
Datum
gistendscan(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque p;
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque so;
p = (GISTScanOpaque) s->opaque;
so = (GISTScanOpaque) scan->opaque;
if (p != NULL)
if (so != NULL)
{
gistfreestack(p->s_stack);
gistfreestack(p->s_markstk);
if (p->giststate != NULL)
freeGISTstate(p->giststate);
pfree(s->opaque);
gistfreestack(so->stack);
gistfreestack(so->markstk);
if (so->giststate != NULL)
freeGISTstate(so->giststate);
/* drop pins on buffers -- we aren't holding any locks */
if (BufferIsValid(so->curbuf))
ReleaseBuffer(so->curbuf);
if (BufferIsValid(so->markbuf))
ReleaseBuffer(so->markbuf);
MemoryContextDelete(so->tempCxt);
pfree(scan->opaque);
}
gistdropscan(s);
/* XXX don't unset read lock -- two-phase locking */
gistdropscan(scan);
PG_RETURN_VOID();
}
static void
gistregscan(IndexScanDesc s)
gistregscan(IndexScanDesc scan)
{
GISTScanList l;
l = (GISTScanList) palloc(sizeof(GISTScanListData));
l->gsl_scan = s;
l->gsl_scan = scan;
l->gsl_owner = CurrentResourceOwner;
l->gsl_next = GISTScans;
GISTScans = l;
}
static void
gistdropscan(IndexScanDesc s)
gistdropscan(IndexScanDesc scan)
{
GISTScanList l;
GISTScanList prev;
prev = NULL;
for (l = GISTScans; l != NULL && l->gsl_scan != s; l = l->gsl_next)
for (l = GISTScans; l != NULL && l->gsl_scan != scan; l = l->gsl_next)
prev = l;
if (l == NULL)
elog(ERROR, "GiST scan list corrupted -- could not find 0x%p",
(void *) s);
(void *) scan);
if (prev == NULL)
GISTScans = l->gsl_next;
......@@ -313,22 +355,22 @@ gistadjscans(Relation rel, int op, BlockNumber blkno, OffsetNumber offnum)
* update. If so, we make the change here.
*/
static void
gistadjone(IndexScanDesc s,
gistadjone(IndexScanDesc scan,
int op,
BlockNumber blkno,
OffsetNumber offnum)
{
GISTScanOpaque so;
adjustiptr(s, &(s->currentItemData), op, blkno, offnum);
adjustiptr(s, &(s->currentMarkData), op, blkno, offnum);
adjustiptr(scan, &(scan->currentItemData), op, blkno, offnum);
adjustiptr(scan, &(scan->currentMarkData), op, blkno, offnum);
so = (GISTScanOpaque) s->opaque;
so = (GISTScanOpaque) scan->opaque;
if (op == GISTOP_SPLIT)
{
adjuststack(so->s_stack, blkno);
adjuststack(so->s_markstk, blkno);
adjuststack(so->stack, blkno);
adjuststack(so->markstk, blkno);
}
}
......@@ -340,7 +382,7 @@ gistadjone(IndexScanDesc s,
* the same page.
*/
static void
adjustiptr(IndexScanDesc s,
adjustiptr(IndexScanDesc scan,
ItemPointer iptr,
int op,
BlockNumber blkno,
......@@ -354,7 +396,7 @@ adjustiptr(IndexScanDesc s,
if (ItemPointerGetBlockNumber(iptr) == blkno)
{
curoff = ItemPointerGetOffsetNumber(iptr);
so = (GISTScanOpaque) s->opaque;
so = (GISTScanOpaque) scan->opaque;
switch (op)
{
......@@ -362,7 +404,6 @@ adjustiptr(IndexScanDesc s,
/* back up one if we need to */
if (curoff >= offnum)
{
if (curoff > FirstOffsetNumber)
{
/* just adjust the item pointer */
......@@ -375,10 +416,10 @@ adjustiptr(IndexScanDesc s,
* tuple
*/
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(s->currentItemData))
so->s_flags |= GS_CURBEFORE;
if (iptr == &(scan->currentItemData))
so->flags |= GS_CURBEFORE;
else
so->s_flags |= GS_MRKBEFORE;
so->flags |= GS_MRKBEFORE;
}
}
break;
......@@ -386,10 +427,10 @@ adjustiptr(IndexScanDesc s,
case GISTOP_SPLIT:
/* back to start of page on split */
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(s->currentItemData))
so->s_flags &= ~GS_CURBEFORE;
if (iptr == &(scan->currentItemData))
so->flags &= ~GS_CURBEFORE;
else
so->s_flags &= ~GS_MRKBEFORE;
so->flags &= ~GS_MRKBEFORE;
break;
default:
......@@ -417,9 +458,20 @@ adjuststack(GISTSTACK *stk, BlockNumber blkno)
{
while (stk != NULL)
{
if (stk->gs_blk == blkno)
stk->gs_child = FirstOffsetNumber;
if (stk->block == blkno)
stk->offset = FirstOffsetNumber;
stk = stk->parent;
}
}
stk = stk->gs_parent;
static void
gistfreestack(GISTSTACK *s)
{
while (s != NULL)
{
GISTSTACK *p = s->parent;
pfree(s);
s = p;
}
}
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/gist.h,v 1.44 2005/03/27 23:53:04 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/gist.h,v 1.45 2005/05/17 00:59:30 neilc Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -54,13 +54,21 @@ typedef GISTPageOpaqueData *GISTPageOpaque;
#define GIST_LEAF(entry) (((GISTPageOpaque) PageGetSpecialPointer((entry)->page))->flags & F_LEAF)
/*
* When we descend a tree, we keep a stack of parent pointers.
* When we descend a tree, we keep a stack of parent pointers. This
* allows us to follow a chain of internal node points until we reach
* a leaf node, and then back up the stack to re-examine the internal
* nodes.
*
* 'parent' is the previous stack entry -- i.e. the node we arrived
* from. 'block' is the node's block number. 'offset' is the offset in
* the node's page that we stopped at (i.e. we followed the child
* pointer located at the specified offset).
*/
typedef struct GISTSTACK
{
struct GISTSTACK *gs_parent;
OffsetNumber gs_child;
BlockNumber gs_blk;
struct GISTSTACK *parent;
OffsetNumber offset;
BlockNumber block;
} GISTSTACK;
typedef struct GISTSTATE
......@@ -84,10 +92,13 @@ typedef struct GISTSTATE
*/
typedef struct GISTScanOpaqueData
{
struct GISTSTACK *s_stack;
struct GISTSTACK *s_markstk;
uint16 s_flags;
struct GISTSTATE *giststate;
GISTSTACK *stack;
GISTSTACK *markstk;
uint16 flags;
GISTSTATE *giststate;
MemoryContext tempCxt;
Buffer curbuf;
Buffer markbuf;
} GISTScanOpaqueData;
typedef GISTScanOpaqueData *GISTScanOpaque;
......@@ -101,8 +112,8 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
#define GS_CURBEFORE ((uint16) (1 << 0))
#define GS_MRKBEFORE ((uint16) (1 << 1))
/* root page of a gist */
#define GISTP_ROOT 0
/* root page of a gist index */
#define GIST_ROOT_BLKNO 0
/*
* When we update a relation on which we're doing a scan, we need to
......@@ -183,7 +194,6 @@ extern Datum gistbuild(PG_FUNCTION_ARGS);
extern Datum gistinsert(PG_FUNCTION_ARGS);
extern Datum gistbulkdelete(PG_FUNCTION_ARGS);
extern void _gistdump(Relation r);
extern void gistfreestack(GISTSTACK *s);
extern void initGISTstate(GISTSTATE *giststate, Relation index);
extern void freeGISTstate(GISTSTATE *giststate);
extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
......@@ -193,6 +203,7 @@ extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
extern void gist_redo(XLogRecPtr lsn, XLogRecord *record);
extern void gist_undo(XLogRecPtr lsn, XLogRecord *record);
extern void gist_desc(char *buf, uint8 xl_info, char *rec);
extern MemoryContext createTempGistContext(void);
/* gistget.c */
extern Datum gistgettuple(PG_FUNCTION_ARGS);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册