提交 eda6dd32 编写于 作者: N Neil Conway

GiST improvements:

- make sure we always invoke user-supplied GiST methods in a short-lived
  memory context. This means the backend isn't exposed to any memory leaks
  that be in those methods (in fact, it is probably a net loss for most
  GiST methods to bother manually freeing memory now). This also means
  we can do away with a lot of ugly manual memory management in the
  GiST code itself.

- keep the current page of a GiST index scan pinned, rather than doing a
  ReadBuffer() for each tuple produced by the scan. Since ReadBuffer() is
  expensive, this is a perf. win

- implement dead tuple killing for GiST indexes (which is easy to do, now
  that we keep a pin on the current scan page). Now all the builtin indexes
  implement dead tuple killing.

- cleanup a lot of ugly code in GiST
上级 818bfda1
<!-- <!--
$PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.17 2005/04/09 03:52:43 momjian Exp $ $PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.18 2005/05/17 00:59:30 neilc Exp $
--> -->
<chapter id="GiST"> <chapter id="GiST">
...@@ -202,7 +202,7 @@ $PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.17 2005/04/09 03:52:43 momjian Exp ...@@ -202,7 +202,7 @@ $PostgreSQL: pgsql/doc/src/sgml/gist.sgml,v 1.17 2005/04/09 03:52:43 momjian Exp
<para> <para>
The lack of write-ahead logging is just a small matter of programming, The lack of write-ahead logging is just a small matter of programming,
but since it isn't done yet, a crash could render a <acronym>GiST</acronym> but since it isn't done yet, a crash could render a <acronym>GiST</acronym>
index inconsistent, forcing a REINDEX. index inconsistent, forcing a <command>REINDEX</command>.
</para> </para>
</sect1> </sect1>
......
此差异已折叠。
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.45 2005/03/27 23:52:55 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.46 2005/05/17 00:59:30 neilc Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -16,41 +16,71 @@ ...@@ -16,41 +16,71 @@
#include "access/gist.h" #include "access/gist.h"
#include "executor/execdebug.h" #include "executor/execdebug.h"
#include "utils/memutils.h"
static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n,
static OffsetNumber gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir);
ScanDirection dir); static bool gistnext(IndexScanDesc scan, ScanDirection dir);
static bool gistscancache(IndexScanDesc s, ScanDirection dir); static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan,
static bool gistfirst(IndexScanDesc s, ScanDirection dir); OffsetNumber offset);
static bool gistnext(IndexScanDesc s, ScanDirection dir);
static bool gistindex_keytest(IndexTuple tuple,
int scanKeySize, ScanKey key, GISTSTATE *giststate,
Relation r, Page p, OffsetNumber offset);
/*
* gistgettuple() -- Get the next tuple in the scan
*/
Datum Datum
gistgettuple(PG_FUNCTION_ARGS) gistgettuple(PG_FUNCTION_ARGS)
{ {
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
bool res; Page page;
OffsetNumber offnum;
GISTScanOpaque so;
/* if we have it cached in the scan desc, just return the value */ so = (GISTScanOpaque) scan->opaque;
if (gistscancache(s, dir))
PG_RETURN_BOOL(true);
/* not cached, so we'll have to do some work */ /*
if (ItemPointerIsValid(&(s->currentItemData))) * If we have produced an index tuple in the past and the executor
res = gistnext(s, dir); * has informed us we need to mark it as "killed", do so now.
else *
res = gistfirst(s, dir); * XXX: right now there is no concurrent access. In the
PG_RETURN_BOOL(res); * future, we should (a) get a read lock on the page (b) check
* that the location of the previously-fetched tuple hasn't
* changed due to concurrent insertions.
*/
if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData)))
{
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->curbuf);
PageGetItemId(page, offnum)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(so->curbuf);
}
/*
* Get the next tuple that matches the search key. If asked to
* skip killed tuples, continue looping until we find a non-killed
* tuple that matches the search key.
*/
for (;;)
{
bool res = gistnext(scan, dir);
if (res == true && scan->ignore_killed_tuples)
{
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->curbuf);
if (ItemIdDeleted(PageGetItemId(page, offnum)))
continue;
}
PG_RETURN_BOOL(res);
}
} }
Datum Datum
gistgetmulti(PG_FUNCTION_ARGS) gistgetmulti(PG_FUNCTION_ARGS)
{ {
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2); int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
...@@ -60,13 +90,10 @@ gistgetmulti(PG_FUNCTION_ARGS) ...@@ -60,13 +90,10 @@ gistgetmulti(PG_FUNCTION_ARGS)
/* XXX generic implementation: loop around guts of gistgettuple */ /* XXX generic implementation: loop around guts of gistgettuple */
while (ntids < max_tids) while (ntids < max_tids)
{ {
if (ItemPointerIsValid(&(s->currentItemData))) res = gistnext(scan, ForwardScanDirection);
res = gistnext(s, ForwardScanDirection);
else
res = gistfirst(s, ForwardScanDirection);
if (!res) if (!res)
break; break;
tids[ntids] = s->xs_ctup.t_self; tids[ntids] = scan->xs_ctup.t_self;
ntids++; ntids++;
} }
...@@ -74,166 +101,123 @@ gistgetmulti(PG_FUNCTION_ARGS) ...@@ -74,166 +101,123 @@ gistgetmulti(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(res); PG_RETURN_BOOL(res);
} }
/*
* Fetch a tuple that matchs the search key; this can be invoked
* either to fetch the first such tuple or subsequent matching
* tuples. Returns true iff a matching tuple was found.
*/
static bool static bool
gistfirst(IndexScanDesc s, ScanDirection dir) gistnext(IndexScanDesc scan, ScanDirection dir)
{ {
Buffer b;
Page p; Page p;
OffsetNumber n; OffsetNumber n;
OffsetNumber maxoff;
GISTPageOpaque po; GISTPageOpaque po;
GISTScanOpaque so; GISTScanOpaque so;
GISTSTACK *stk; GISTSTACK *stk;
BlockNumber blk;
IndexTuple it; IndexTuple it;
so = (GISTScanOpaque) s->opaque; so = (GISTScanOpaque) scan->opaque;
if (ItemPointerIsValid(&scan->currentItemData) == false)
{
/* Being asked to fetch the first entry, so start at the root */
Assert(so->curbuf == InvalidBuffer);
so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO);
}
b = ReadBuffer(s->indexRelation, GISTP_ROOT); p = BufferGetPage(so->curbuf);
p = BufferGetPage(b);
po = (GISTPageOpaque) PageGetSpecialPointer(p); po = (GISTPageOpaque) PageGetSpecialPointer(p);
for (;;) if (ItemPointerIsValid(&scan->currentItemData) == false)
{ {
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir)) if (ScanDirectionIsBackward(dir))
n = gistfindnext(s, p, maxoff, dir); n = PageGetMaxOffsetNumber(p);
else else
n = gistfindnext(s, p, FirstOffsetNumber, dir); n = FirstOffsetNumber;
while (n < FirstOffsetNumber || n > maxoff)
{
stk = so->s_stack;
if (stk == NULL)
{
ReleaseBuffer(b);
return false;
}
b = ReleaseAndReadBuffer(b, s->indexRelation, stk->gs_blk);
p = BufferGetPage(b);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(stk->gs_child);
else
n = OffsetNumberNext(stk->gs_child);
so->s_stack = stk->gs_parent;
pfree(stk);
n = gistfindnext(s, p, n, dir);
}
if (po->flags & F_LEAF)
{
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
s->xs_ctup.t_self = it->t_tid;
ReleaseBuffer(b);
return true;
}
else
{
stk = (GISTSTACK *) palloc(sizeof(GISTSTACK));
stk->gs_child = n;
stk->gs_blk = BufferGetBlockNumber(b);
stk->gs_parent = so->s_stack;
so->s_stack = stk;
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
blk = ItemPointerGetBlockNumber(&(it->t_tid));
b = ReleaseAndReadBuffer(b, s->indexRelation, blk);
p = BufferGetPage(b);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
}
} }
}
static bool
gistnext(IndexScanDesc s, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
OffsetNumber maxoff;
GISTPageOpaque po;
GISTScanOpaque so;
GISTSTACK *stk;
BlockNumber blk;
IndexTuple it;
so = (GISTScanOpaque) s->opaque;
blk = ItemPointerGetBlockNumber(&(s->currentItemData));
n = ItemPointerGetOffsetNumber(&(s->currentItemData));
if (ScanDirectionIsForward(dir))
n = OffsetNumberNext(n);
else else
n = OffsetNumberPrev(n); {
n = ItemPointerGetOffsetNumber(&(scan->currentItemData));
b = ReadBuffer(s->indexRelation, blk); if (ScanDirectionIsBackward(dir))
p = BufferGetPage(b); n = OffsetNumberPrev(n);
po = (GISTPageOpaque) PageGetSpecialPointer(p); else
n = OffsetNumberNext(n);
}
for (;;) for (;;)
{ {
maxoff = PageGetMaxOffsetNumber(p); n = gistfindnext(scan, n, dir);
n = gistfindnext(s, p, n, dir);
while (n < FirstOffsetNumber || n > maxoff) if (!OffsetNumberIsValid(n))
{ {
stk = so->s_stack; /*
if (stk == NULL) * We ran out of matching index entries on the current
* page, so pop the top stack entry and use it to continue
* the search.
*/
/* If we're out of stack entries, we're done */
if (so->stack == NULL)
{ {
ReleaseBuffer(b); ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
return false; return false;
} }
b = ReleaseAndReadBuffer(b, s->indexRelation, stk->gs_blk); stk = so->stack;
p = BufferGetPage(b); so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
stk->block);
p = BufferGetPage(so->curbuf);
po = (GISTPageOpaque) PageGetSpecialPointer(p); po = (GISTPageOpaque) PageGetSpecialPointer(p);
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir)) if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(stk->gs_child); n = OffsetNumberPrev(stk->offset);
else else
n = OffsetNumberNext(stk->gs_child); n = OffsetNumberNext(stk->offset);
so->s_stack = stk->gs_parent; so->stack = stk->parent;
pfree(stk); pfree(stk);
n = gistfindnext(s, p, n, dir); continue;
} }
if (po->flags & F_LEAF) if (po->flags & F_LEAF)
{ {
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); /*
* We've found a matching index entry in a leaf page, so
* return success. Note that we keep "curbuf" pinned so
* that we can efficiently resume the index scan later.
*/
ItemPointerSet(&(scan->currentItemData),
BufferGetBlockNumber(so->curbuf), n);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
scan->xs_ctup.t_self = it->t_tid;
s->xs_ctup.t_self = it->t_tid;
ReleaseBuffer(b);
return true; return true;
} }
else else
{ {
/*
* We've found an entry in an internal node whose key is
* consistent with the search key, so continue the search
* in the pointed-to child node (i.e. we search depth
* first). Push the current node onto the stack so we
* resume searching from this node later.
*/
BlockNumber child_block;
stk = (GISTSTACK *) palloc(sizeof(GISTSTACK)); stk = (GISTSTACK *) palloc(sizeof(GISTSTACK));
stk->gs_child = n; stk->offset = n;
stk->gs_blk = BufferGetBlockNumber(b); stk->block = BufferGetBlockNumber(so->curbuf);
stk->gs_parent = so->s_stack; stk->parent = so->stack;
so->s_stack = stk; so->stack = stk;
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
blk = ItemPointerGetBlockNumber(&(it->t_tid)); child_block = ItemPointerGetBlockNumber(&(it->t_tid));
b = ReleaseAndReadBuffer(b, s->indexRelation, blk); so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
p = BufferGetPage(b); child_block);
p = BufferGetPage(so->curbuf);
po = (GISTPageOpaque) PageGetSpecialPointer(p); po = (GISTPageOpaque) PageGetSpecialPointer(p);
if (ScanDirectionIsBackward(dir)) if (ScanDirectionIsBackward(dir))
...@@ -244,19 +228,34 @@ gistnext(IndexScanDesc s, ScanDirection dir) ...@@ -244,19 +228,34 @@ gistnext(IndexScanDesc s, ScanDirection dir)
} }
} }
/* Similar to index_keytest, but decompresses the key in the IndexTuple */ /*
* Similar to index_keytest, but first decompress the key in the
* IndexTuple before passing it to the sk_func (and we have previously
* overwritten the sk_func to use the user-defined Consistent method,
* so we actually invoke that). Note that this function is always
* invoked in a short-lived memory context, so we don't need to worry
* about cleaning up allocated memory (either here or in the
* implementation of any Consistent methods).
*/
static bool static bool
gistindex_keytest(IndexTuple tuple, gistindex_keytest(IndexTuple tuple,
int scanKeySize, IndexScanDesc scan,
ScanKey key,
GISTSTATE *giststate,
Relation r,
Page p,
OffsetNumber offset) OffsetNumber offset)
{ {
int keySize = scan->numberOfKeys;
ScanKey key = scan->keyData;
Relation r = scan->indexRelation;
GISTScanOpaque so;
Page p;
GISTSTATE *giststate;
so = (GISTScanOpaque) scan->opaque;
giststate = so->giststate;
p = BufferGetPage(so->curbuf);
IncrIndexProcessed(); IncrIndexProcessed();
while (scanKeySize > 0) while (keySize > 0)
{ {
Datum datum; Datum datum;
bool isNull; bool isNull;
...@@ -297,53 +296,57 @@ gistindex_keytest(IndexTuple tuple, ...@@ -297,53 +296,57 @@ gistindex_keytest(IndexTuple tuple,
Int32GetDatum(key->sk_strategy), Int32GetDatum(key->sk_strategy),
ObjectIdGetDatum(key->sk_subtype)); ObjectIdGetDatum(key->sk_subtype));
/* if index datum had to be decompressed, free it */
if (de.key != datum && !isAttByVal(giststate, key->sk_attno - 1))
if (DatumGetPointer(de.key) != NULL)
pfree(DatumGetPointer(de.key));
if (!DatumGetBool(test)) if (!DatumGetBool(test))
return false; return false;
scanKeySize--; keySize--;
key++; key++;
} }
return true; return true;
} }
/*
* Return the offset of the first index entry that is consistent with
* the search key after offset 'n' in the current page. If there are
* no more consistent entries, return InvalidOffsetNumber.
*/
static OffsetNumber static OffsetNumber
gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir) gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)
{ {
OffsetNumber maxoff; OffsetNumber maxoff;
IndexTuple it; IndexTuple it;
GISTPageOpaque po; GISTPageOpaque po;
GISTScanOpaque so; GISTScanOpaque so;
GISTSTATE *giststate; MemoryContext oldcxt;
Page p;
so = (GISTScanOpaque) scan->opaque;
p = BufferGetPage(so->curbuf);
maxoff = PageGetMaxOffsetNumber(p); maxoff = PageGetMaxOffsetNumber(p);
po = (GISTPageOpaque) PageGetSpecialPointer(p); po = (GISTPageOpaque) PageGetSpecialPointer(p);
so = (GISTScanOpaque) s->opaque;
giststate = so->giststate; /*
* Make sure we're in a short-lived memory context when we invoke
* a user-supplied GiST method in gistindex_keytest(), so we don't
* leak memory
*/
oldcxt = MemoryContextSwitchTo(so->tempCxt);
/* /*
* If we modified the index during the scan, we may have a pointer to * If we modified the index during the scan, we may have a pointer to
* a ghost tuple, before the scan. If this is the case, back up one. * a ghost tuple, before the scan. If this is the case, back up one.
*/ */
if (so->flags & GS_CURBEFORE)
if (so->s_flags & GS_CURBEFORE)
{ {
so->s_flags &= ~GS_CURBEFORE; so->flags &= ~GS_CURBEFORE;
n = OffsetNumberPrev(n); n = OffsetNumberPrev(n);
} }
while (n >= FirstOffsetNumber && n <= maxoff) while (n >= FirstOffsetNumber && n <= maxoff)
{ {
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
if (gistindex_keytest(it, if (gistindex_keytest(it, scan, n))
s->numberOfKeys, s->keyData, giststate,
s->indexRelation, p, n))
break; break;
if (ScanDirectionIsBackward(dir)) if (ScanDirectionIsBackward(dir))
...@@ -352,28 +355,16 @@ gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir) ...@@ -352,28 +355,16 @@ gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir)
n = OffsetNumberNext(n); n = OffsetNumberNext(n);
} }
return n; MemoryContextSwitchTo(oldcxt);
} MemoryContextReset(so->tempCxt);
static bool
gistscancache(IndexScanDesc s, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
IndexTuple it;
if (!(ScanDirectionIsNoMovement(dir) /*
&& ItemPointerIsValid(&(s->currentItemData)))) * If we found a matching entry, return its offset; otherwise
return false; * return InvalidOffsetNumber to inform the caller to go to the
* next page.
b = ReadBuffer(s->indexRelation, */
ItemPointerGetBlockNumber(&(s->currentItemData))); if (n >= FirstOffsetNumber && n <= maxoff)
p = BufferGetPage(b); return n;
n = ItemPointerGetOffsetNumber(&(s->currentItemData)); else
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); return InvalidOffsetNumber;
s->xs_ctup.t_self = it->t_tid;
ReleaseBuffer(b);
return true;
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.56 2004/12/31 21:59:10 pgsql Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.57 2005/05/17 00:59:30 neilc Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -17,28 +17,29 @@ ...@@ -17,28 +17,29 @@
#include "access/genam.h" #include "access/genam.h"
#include "access/gist.h" #include "access/gist.h"
#include "access/gistscan.h" #include "access/gistscan.h"
#include "utils/memutils.h"
#include "utils/resowner.h" #include "utils/resowner.h"
/* routines defined and used here */ /* routines defined and used here */
static void gistregscan(IndexScanDesc s); static void gistregscan(IndexScanDesc scan);
static void gistdropscan(IndexScanDesc s); static void gistdropscan(IndexScanDesc scan);
static void gistadjone(IndexScanDesc s, int op, BlockNumber blkno, static void gistadjone(IndexScanDesc scan, int op, BlockNumber blkno,
OffsetNumber offnum); OffsetNumber offnum);
static void adjuststack(GISTSTACK *stk, BlockNumber blkno); static void adjuststack(GISTSTACK *stk, BlockNumber blkno);
static void adjustiptr(IndexScanDesc s, ItemPointer iptr, static void adjustiptr(IndexScanDesc scan, ItemPointer iptr,
int op, BlockNumber blkno, OffsetNumber offnum); int op, BlockNumber blkno, OffsetNumber offnum);
static void gistfreestack(GISTSTACK *s);
/* /*
* Whenever we start a GiST scan in a backend, we register it in private * Whenever we start a GiST scan in a backend, we register it in
* space. Then if the GiST index gets updated, we check all registered * private space. Then if the GiST index gets updated, we check all
* scans and adjust them if the tuple they point at got moved by the * registered scans and adjust them if the tuple they point at got
* update. We only need to do this in private space, because when we update * moved by the update. We only need to do this in private space,
* an GiST we have a write lock on the tree, so no other process can have * because when we update an GiST we have a write lock on the tree, so
* any locks at all on it. A single transaction can have write and read * no other process can have any locks at all on it. A single
* locks on the same object, so that's why we need to handle this case. * transaction can have write and read locks on the same object, so
* that's why we need to handle this case.
*/ */
typedef struct GISTScanListData typedef struct GISTScanListData
{ {
IndexScanDesc gsl_scan; IndexScanDesc gsl_scan;
...@@ -57,65 +58,77 @@ gistbeginscan(PG_FUNCTION_ARGS) ...@@ -57,65 +58,77 @@ gistbeginscan(PG_FUNCTION_ARGS)
Relation r = (Relation) PG_GETARG_POINTER(0); Relation r = (Relation) PG_GETARG_POINTER(0);
int nkeys = PG_GETARG_INT32(1); int nkeys = PG_GETARG_INT32(1);
ScanKey key = (ScanKey) PG_GETARG_POINTER(2); ScanKey key = (ScanKey) PG_GETARG_POINTER(2);
IndexScanDesc s; IndexScanDesc scan;
s = RelationGetIndexScan(r, nkeys, key); scan = RelationGetIndexScan(r, nkeys, key);
gistregscan(scan);
gistregscan(s); PG_RETURN_POINTER(scan);
PG_RETURN_POINTER(s);
} }
Datum Datum
gistrescan(PG_FUNCTION_ARGS) gistrescan(PG_FUNCTION_ARGS)
{ {
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanKey key = (ScanKey) PG_GETARG_POINTER(1); ScanKey key = (ScanKey) PG_GETARG_POINTER(1);
GISTScanOpaque p; GISTScanOpaque so;
int i; int i;
/* /*
* Clear all the pointers. * Clear all the pointers.
*/ */
ItemPointerSetInvalid(&s->currentItemData); ItemPointerSetInvalid(&scan->currentItemData);
ItemPointerSetInvalid(&s->currentMarkData); ItemPointerSetInvalid(&scan->currentMarkData);
p = (GISTScanOpaque) s->opaque; so = (GISTScanOpaque) scan->opaque;
if (p != NULL) if (so != NULL)
{ {
/* rescan an existing indexscan --- reset state */ /* rescan an existing indexscan --- reset state */
gistfreestack(p->s_stack); gistfreestack(so->stack);
gistfreestack(p->s_markstk); gistfreestack(so->markstk);
p->s_stack = p->s_markstk = NULL; so->stack = so->markstk = NULL;
p->s_flags = 0x0; so->flags = 0x0;
/* drop pins on buffers -- no locks held */
if (BufferIsValid(so->curbuf))
{
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
}
if (BufferIsValid(so->markbuf))
{
ReleaseBuffer(so->markbuf);
so->markbuf = InvalidBuffer;
}
} }
else else
{ {
/* initialize opaque data */ /* initialize opaque data */
p = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData));
p->s_stack = p->s_markstk = NULL; so->stack = so->markstk = NULL;
p->s_flags = 0x0; so->flags = 0x0;
s->opaque = p; so->tempCxt = createTempGistContext();
p->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); so->curbuf = so->markbuf = InvalidBuffer;
initGISTstate(p->giststate, s->indexRelation); so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
initGISTstate(so->giststate, scan->indexRelation);
scan->opaque = so;
} }
/* Update scan key, if a new one is given */ /* Update scan key, if a new one is given */
if (key && s->numberOfKeys > 0) if (key && scan->numberOfKeys > 0)
{ {
memmove(s->keyData, memmove(scan->keyData, key,
key, scan->numberOfKeys * sizeof(ScanKeyData));
s->numberOfKeys * sizeof(ScanKeyData));
/* /*
* Modify the scan key so that the Consistent function is called * Modify the scan key so that all the Consistent method is
* for all comparisons. The original operator is passed to the * called for all comparisons. The original operator is passed
* Consistent function in the form of its strategy number, which * to the Consistent function in the form of its strategy
* is available from the sk_strategy field, and its subtype from * number, which is available from the sk_strategy field, and
* the sk_subtype field. * its subtype from the sk_subtype field.
*/ */
for (i = 0; i < s->numberOfKeys; i++) for (i = 0; i < scan->numberOfKeys; i++)
s->keyData[i].sk_func = p->giststate->consistentFn[s->keyData[i].sk_attno - 1]; scan->keyData[i].sk_func = so->giststate->consistentFn[scan->keyData[i].sk_attno - 1];
} }
PG_RETURN_VOID(); PG_RETURN_VOID();
...@@ -124,35 +137,47 @@ gistrescan(PG_FUNCTION_ARGS) ...@@ -124,35 +137,47 @@ gistrescan(PG_FUNCTION_ARGS)
Datum Datum
gistmarkpos(PG_FUNCTION_ARGS) gistmarkpos(PG_FUNCTION_ARGS)
{ {
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque p; GISTScanOpaque so;
GISTSTACK *o, GISTSTACK *o,
*n, *n,
*tmp; *tmp;
s->currentMarkData = s->currentItemData; scan->currentMarkData = scan->currentItemData;
p = (GISTScanOpaque) s->opaque; so = (GISTScanOpaque) scan->opaque;
if (p->s_flags & GS_CURBEFORE) if (so->flags & GS_CURBEFORE)
p->s_flags |= GS_MRKBEFORE; so->flags |= GS_MRKBEFORE;
else else
p->s_flags &= ~GS_MRKBEFORE; so->flags &= ~GS_MRKBEFORE;
o = NULL; o = NULL;
n = p->s_stack; n = so->stack;
/* copy the parent stack from the current item data */ /* copy the parent stack from the current item data */
while (n != NULL) while (n != NULL)
{ {
tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK)); tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK));
tmp->gs_child = n->gs_child; tmp->offset = n->offset;
tmp->gs_blk = n->gs_blk; tmp->block = n->block;
tmp->gs_parent = o; tmp->parent = o;
o = tmp; o = tmp;
n = n->gs_parent; n = n->parent;
} }
gistfreestack(p->s_markstk); gistfreestack(so->markstk);
p->s_markstk = o; so->markstk = o;
/* Update markbuf: make sure to bump ref count on curbuf */
if (BufferIsValid(so->markbuf))
{
ReleaseBuffer(so->markbuf);
so->markbuf = InvalidBuffer;
}
if (BufferIsValid(so->curbuf))
{
IncrBufferRefCount(so->curbuf);
so->markbuf = so->curbuf;
}
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -160,35 +185,47 @@ gistmarkpos(PG_FUNCTION_ARGS) ...@@ -160,35 +185,47 @@ gistmarkpos(PG_FUNCTION_ARGS)
Datum Datum
gistrestrpos(PG_FUNCTION_ARGS) gistrestrpos(PG_FUNCTION_ARGS)
{ {
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque p; GISTScanOpaque so;
GISTSTACK *o, GISTSTACK *o,
*n, *n,
*tmp; *tmp;
s->currentItemData = s->currentMarkData; scan->currentItemData = scan->currentMarkData;
p = (GISTScanOpaque) s->opaque; so = (GISTScanOpaque) scan->opaque;
if (p->s_flags & GS_MRKBEFORE) if (so->flags & GS_MRKBEFORE)
p->s_flags |= GS_CURBEFORE; so->flags |= GS_CURBEFORE;
else else
p->s_flags &= ~GS_CURBEFORE; so->flags &= ~GS_CURBEFORE;
o = NULL; o = NULL;
n = p->s_markstk; n = so->markstk;
/* copy the parent stack from the current item data */ /* copy the parent stack from the current item data */
while (n != NULL) while (n != NULL)
{ {
tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK)); tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK));
tmp->gs_child = n->gs_child; tmp->offset = n->offset;
tmp->gs_blk = n->gs_blk; tmp->block = n->block;
tmp->gs_parent = o; tmp->parent = o;
o = tmp; o = tmp;
n = n->gs_parent; n = n->parent;
} }
gistfreestack(p->s_stack); gistfreestack(so->stack);
p->s_stack = o; so->stack = o;
/* Update curbuf: be sure to bump ref count on markbuf */
if (BufferIsValid(so->curbuf))
{
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
}
if (BufferIsValid(so->markbuf))
{
IncrBufferRefCount(so->markbuf);
so->curbuf = so->markbuf;
}
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -196,52 +233,57 @@ gistrestrpos(PG_FUNCTION_ARGS) ...@@ -196,52 +233,57 @@ gistrestrpos(PG_FUNCTION_ARGS)
Datum Datum
gistendscan(PG_FUNCTION_ARGS) gistendscan(PG_FUNCTION_ARGS)
{ {
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque p; GISTScanOpaque so;
p = (GISTScanOpaque) s->opaque; so = (GISTScanOpaque) scan->opaque;
if (p != NULL) if (so != NULL)
{ {
gistfreestack(p->s_stack); gistfreestack(so->stack);
gistfreestack(p->s_markstk); gistfreestack(so->markstk);
if (p->giststate != NULL) if (so->giststate != NULL)
freeGISTstate(p->giststate); freeGISTstate(so->giststate);
pfree(s->opaque); /* drop pins on buffers -- we aren't holding any locks */
if (BufferIsValid(so->curbuf))
ReleaseBuffer(so->curbuf);
if (BufferIsValid(so->markbuf))
ReleaseBuffer(so->markbuf);
MemoryContextDelete(so->tempCxt);
pfree(scan->opaque);
} }
gistdropscan(s); gistdropscan(scan);
/* XXX don't unset read lock -- two-phase locking */
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
static void static void
gistregscan(IndexScanDesc s) gistregscan(IndexScanDesc scan)
{ {
GISTScanList l; GISTScanList l;
l = (GISTScanList) palloc(sizeof(GISTScanListData)); l = (GISTScanList) palloc(sizeof(GISTScanListData));
l->gsl_scan = s; l->gsl_scan = scan;
l->gsl_owner = CurrentResourceOwner; l->gsl_owner = CurrentResourceOwner;
l->gsl_next = GISTScans; l->gsl_next = GISTScans;
GISTScans = l; GISTScans = l;
} }
static void static void
gistdropscan(IndexScanDesc s) gistdropscan(IndexScanDesc scan)
{ {
GISTScanList l; GISTScanList l;
GISTScanList prev; GISTScanList prev;
prev = NULL; prev = NULL;
for (l = GISTScans; l != NULL && l->gsl_scan != s; l = l->gsl_next) for (l = GISTScans; l != NULL && l->gsl_scan != scan; l = l->gsl_next)
prev = l; prev = l;
if (l == NULL) if (l == NULL)
elog(ERROR, "GiST scan list corrupted -- could not find 0x%p", elog(ERROR, "GiST scan list corrupted -- could not find 0x%p",
(void *) s); (void *) scan);
if (prev == NULL) if (prev == NULL)
GISTScans = l->gsl_next; GISTScans = l->gsl_next;
...@@ -313,22 +355,22 @@ gistadjscans(Relation rel, int op, BlockNumber blkno, OffsetNumber offnum) ...@@ -313,22 +355,22 @@ gistadjscans(Relation rel, int op, BlockNumber blkno, OffsetNumber offnum)
* update. If so, we make the change here. * update. If so, we make the change here.
*/ */
static void static void
gistadjone(IndexScanDesc s, gistadjone(IndexScanDesc scan,
int op, int op,
BlockNumber blkno, BlockNumber blkno,
OffsetNumber offnum) OffsetNumber offnum)
{ {
GISTScanOpaque so; GISTScanOpaque so;
adjustiptr(s, &(s->currentItemData), op, blkno, offnum); adjustiptr(scan, &(scan->currentItemData), op, blkno, offnum);
adjustiptr(s, &(s->currentMarkData), op, blkno, offnum); adjustiptr(scan, &(scan->currentMarkData), op, blkno, offnum);
so = (GISTScanOpaque) s->opaque; so = (GISTScanOpaque) scan->opaque;
if (op == GISTOP_SPLIT) if (op == GISTOP_SPLIT)
{ {
adjuststack(so->s_stack, blkno); adjuststack(so->stack, blkno);
adjuststack(so->s_markstk, blkno); adjuststack(so->markstk, blkno);
} }
} }
...@@ -340,7 +382,7 @@ gistadjone(IndexScanDesc s, ...@@ -340,7 +382,7 @@ gistadjone(IndexScanDesc s,
* the same page. * the same page.
*/ */
static void static void
adjustiptr(IndexScanDesc s, adjustiptr(IndexScanDesc scan,
ItemPointer iptr, ItemPointer iptr,
int op, int op,
BlockNumber blkno, BlockNumber blkno,
...@@ -354,7 +396,7 @@ adjustiptr(IndexScanDesc s, ...@@ -354,7 +396,7 @@ adjustiptr(IndexScanDesc s,
if (ItemPointerGetBlockNumber(iptr) == blkno) if (ItemPointerGetBlockNumber(iptr) == blkno)
{ {
curoff = ItemPointerGetOffsetNumber(iptr); curoff = ItemPointerGetOffsetNumber(iptr);
so = (GISTScanOpaque) s->opaque; so = (GISTScanOpaque) scan->opaque;
switch (op) switch (op)
{ {
...@@ -362,7 +404,6 @@ adjustiptr(IndexScanDesc s, ...@@ -362,7 +404,6 @@ adjustiptr(IndexScanDesc s,
/* back up one if we need to */ /* back up one if we need to */
if (curoff >= offnum) if (curoff >= offnum)
{ {
if (curoff > FirstOffsetNumber) if (curoff > FirstOffsetNumber)
{ {
/* just adjust the item pointer */ /* just adjust the item pointer */
...@@ -375,10 +416,10 @@ adjustiptr(IndexScanDesc s, ...@@ -375,10 +416,10 @@ adjustiptr(IndexScanDesc s,
* tuple * tuple
*/ */
ItemPointerSet(iptr, blkno, FirstOffsetNumber); ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(s->currentItemData)) if (iptr == &(scan->currentItemData))
so->s_flags |= GS_CURBEFORE; so->flags |= GS_CURBEFORE;
else else
so->s_flags |= GS_MRKBEFORE; so->flags |= GS_MRKBEFORE;
} }
} }
break; break;
...@@ -386,10 +427,10 @@ adjustiptr(IndexScanDesc s, ...@@ -386,10 +427,10 @@ adjustiptr(IndexScanDesc s,
case GISTOP_SPLIT: case GISTOP_SPLIT:
/* back to start of page on split */ /* back to start of page on split */
ItemPointerSet(iptr, blkno, FirstOffsetNumber); ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(s->currentItemData)) if (iptr == &(scan->currentItemData))
so->s_flags &= ~GS_CURBEFORE; so->flags &= ~GS_CURBEFORE;
else else
so->s_flags &= ~GS_MRKBEFORE; so->flags &= ~GS_MRKBEFORE;
break; break;
default: default:
...@@ -417,9 +458,20 @@ adjuststack(GISTSTACK *stk, BlockNumber blkno) ...@@ -417,9 +458,20 @@ adjuststack(GISTSTACK *stk, BlockNumber blkno)
{ {
while (stk != NULL) while (stk != NULL)
{ {
if (stk->gs_blk == blkno) if (stk->block == blkno)
stk->gs_child = FirstOffsetNumber; stk->offset = FirstOffsetNumber;
stk = stk->parent;
}
}
stk = stk->gs_parent; static void
gistfreestack(GISTSTACK *s)
{
while (s != NULL)
{
GISTSTACK *p = s->parent;
pfree(s);
s = p;
} }
} }
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/gist.h,v 1.44 2005/03/27 23:53:04 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/gist.h,v 1.45 2005/05/17 00:59:30 neilc Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -54,13 +54,21 @@ typedef GISTPageOpaqueData *GISTPageOpaque; ...@@ -54,13 +54,21 @@ typedef GISTPageOpaqueData *GISTPageOpaque;
#define GIST_LEAF(entry) (((GISTPageOpaque) PageGetSpecialPointer((entry)->page))->flags & F_LEAF) #define GIST_LEAF(entry) (((GISTPageOpaque) PageGetSpecialPointer((entry)->page))->flags & F_LEAF)
/* /*
* When we descend a tree, we keep a stack of parent pointers. * When we descend a tree, we keep a stack of parent pointers. This
* allows us to follow a chain of internal node points until we reach
* a leaf node, and then back up the stack to re-examine the internal
* nodes.
*
* 'parent' is the previous stack entry -- i.e. the node we arrived
* from. 'block' is the node's block number. 'offset' is the offset in
* the node's page that we stopped at (i.e. we followed the child
* pointer located at the specified offset).
*/ */
typedef struct GISTSTACK typedef struct GISTSTACK
{ {
struct GISTSTACK *gs_parent; struct GISTSTACK *parent;
OffsetNumber gs_child; OffsetNumber offset;
BlockNumber gs_blk; BlockNumber block;
} GISTSTACK; } GISTSTACK;
typedef struct GISTSTATE typedef struct GISTSTATE
...@@ -84,10 +92,13 @@ typedef struct GISTSTATE ...@@ -84,10 +92,13 @@ typedef struct GISTSTATE
*/ */
typedef struct GISTScanOpaqueData typedef struct GISTScanOpaqueData
{ {
struct GISTSTACK *s_stack; GISTSTACK *stack;
struct GISTSTACK *s_markstk; GISTSTACK *markstk;
uint16 s_flags; uint16 flags;
struct GISTSTATE *giststate; GISTSTATE *giststate;
MemoryContext tempCxt;
Buffer curbuf;
Buffer markbuf;
} GISTScanOpaqueData; } GISTScanOpaqueData;
typedef GISTScanOpaqueData *GISTScanOpaque; typedef GISTScanOpaqueData *GISTScanOpaque;
...@@ -101,8 +112,8 @@ typedef GISTScanOpaqueData *GISTScanOpaque; ...@@ -101,8 +112,8 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
#define GS_CURBEFORE ((uint16) (1 << 0)) #define GS_CURBEFORE ((uint16) (1 << 0))
#define GS_MRKBEFORE ((uint16) (1 << 1)) #define GS_MRKBEFORE ((uint16) (1 << 1))
/* root page of a gist */ /* root page of a gist index */
#define GISTP_ROOT 0 #define GIST_ROOT_BLKNO 0
/* /*
* When we update a relation on which we're doing a scan, we need to * When we update a relation on which we're doing a scan, we need to
...@@ -183,7 +194,6 @@ extern Datum gistbuild(PG_FUNCTION_ARGS); ...@@ -183,7 +194,6 @@ extern Datum gistbuild(PG_FUNCTION_ARGS);
extern Datum gistinsert(PG_FUNCTION_ARGS); extern Datum gistinsert(PG_FUNCTION_ARGS);
extern Datum gistbulkdelete(PG_FUNCTION_ARGS); extern Datum gistbulkdelete(PG_FUNCTION_ARGS);
extern void _gistdump(Relation r); extern void _gistdump(Relation r);
extern void gistfreestack(GISTSTACK *s);
extern void initGISTstate(GISTSTATE *giststate, Relation index); extern void initGISTstate(GISTSTATE *giststate, Relation index);
extern void freeGISTstate(GISTSTATE *giststate); extern void freeGISTstate(GISTSTATE *giststate);
extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
...@@ -193,6 +203,7 @@ extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, ...@@ -193,6 +203,7 @@ extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
extern void gist_redo(XLogRecPtr lsn, XLogRecord *record); extern void gist_redo(XLogRecPtr lsn, XLogRecord *record);
extern void gist_undo(XLogRecPtr lsn, XLogRecord *record); extern void gist_undo(XLogRecPtr lsn, XLogRecord *record);
extern void gist_desc(char *buf, uint8 xl_info, char *rec); extern void gist_desc(char *buf, uint8 xl_info, char *rec);
extern MemoryContext createTempGistContext(void);
/* gistget.c */ /* gistget.c */
extern Datum gistgettuple(PG_FUNCTION_ARGS); extern Datum gistgettuple(PG_FUNCTION_ARGS);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册