diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 31464ab2bc3010a99fce07fdae306bec722d6fac..3791b25eb60d3ed401cd653e5c2d1c3c0f1110df 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.10 2008/01/01 19:45:46 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.10.2.1 2008/04/22 17:54:19 teodor Exp $ *------------------------------------------------------------------------- */ @@ -27,58 +27,16 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off) /* page was deleted by concurrent vacuum */ return false; - if (*off > maxoff || *off == InvalidOffsetNumber) - res = -1; - else - res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); + /* + * scan page to find equal or first greater value + */ - if (res == 0) + for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++) { - /* page isn't changed */ - return true; - } - else if (res > 0) - { - /* - * some items was added before our position, look further to find it - * or first greater - */ - - (*off)++; - for (; *off <= maxoff; (*off)++) - { - res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); - - if (res == 0) - return true; - - if (res < 0) - { - (*off)--; - return true; - } - } - } - else - { - /* - * some items was deleted before our position, look from begining to - * find it or first greater - */ - - for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++) - { - res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); - - if (res == 0) - return true; + res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); - if (res < 0) - { - (*off)--; - return true; - } - } + if (res <= 0) + return true; } return false; @@ -89,119 +47,87 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off) * Stop* functions unlock buffer (but don't release!) */ static void -startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall) +startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) { + GinBtreeData btreeEntry; + GinBtreeStack *stackEntry; + Page page; + bool needUnlock = TRUE; + if (entry->master != NULL) { entry->isFinished = entry->master->isFinished; return; } - if (firstCall) + /* + * we should find entry, and begin scan of posting tree + * or just store posting list in memory + */ + + prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); + btreeEntry.searchMode = TRUE; + stackEntry = ginFindLeafPage(&btreeEntry, NULL); + page = BufferGetPage(stackEntry->buffer); + + entry->isFinished = TRUE; + entry->buffer = InvalidBuffer; + entry->offset = InvalidOffsetNumber; + entry->list = NULL; + entry->nlist = 0; + entry->reduceResult = FALSE; + entry->predictNumberResult = 0; + + if (btreeEntry.findItem(&btreeEntry, stackEntry)) { - /* - * at first call we should find entry, and begin scan of posting tree - * or just store posting list in memory - */ - GinBtreeData btreeEntry; - GinBtreeStack *stackEntry; - Page page; - bool needUnlock = TRUE; - - prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); - btreeEntry.searchMode = TRUE; - stackEntry = ginFindLeafPage(&btreeEntry, NULL); - page = BufferGetPage(stackEntry->buffer); - - entry->isFinished = TRUE; - entry->buffer = InvalidBuffer; - entry->offset = InvalidOffsetNumber; - entry->list = NULL; - entry->nlist = 0; - entry->reduceResult = FALSE; - entry->predictNumberResult = 0; - - if (btreeEntry.findItem(&btreeEntry, stackEntry)) - { - IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); - - if (GinIsPostingTree(itup)) - { - BlockNumber rootPostingTree = GinGetPostingTree(itup); - GinPostingTreeScan *gdi; - Page page; - - LockBuffer(stackEntry->buffer, GIN_UNLOCK); - needUnlock = FALSE; - gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); - - entry->buffer = scanBeginPostingTree(gdi); - IncrBufferRefCount(entry->buffer); - - page = BufferGetPage(entry->buffer); - entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; + IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); - freeGinBtreeStack(gdi->stack); - pfree(gdi); - entry->isFinished = FALSE; - } - else if (GinGetNPosting(itup) > 0) - { - entry->nlist = GinGetNPosting(itup); - entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); - memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); - entry->isFinished = FALSE; - } - } + if (GinIsPostingTree(itup)) + { + BlockNumber rootPostingTree = GinGetPostingTree(itup); + GinPostingTreeScan *gdi; + Page page; - if (needUnlock) LockBuffer(stackEntry->buffer, GIN_UNLOCK); - freeGinBtreeStack(stackEntry); - } - else if (entry->buffer != InvalidBuffer) - { - /* we should find place where we was stopped */ - BlockNumber blkno; - Page page; - - LockBuffer(entry->buffer, GIN_SHARE); + needUnlock = FALSE; + gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); - if (!ItemPointerIsValid(&entry->curItem)) - /* start position */ - return; - Assert(entry->offset != InvalidOffsetNumber); + entry->buffer = scanBeginPostingTree(gdi); + /* + * We keep buffer pinned because we need to prevent deletition + * page during scan. See GIN's vacuum implementation. RefCount + * is increased to keep buffer pinned after freeGinBtreeStack() call. + */ + IncrBufferRefCount(entry->buffer); - page = BufferGetPage(entry->buffer); + page = BufferGetPage(entry->buffer); + entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; - /* try to find curItem in current buffer */ - if (findItemInPage(page, &entry->curItem, &entry->offset)) - return; + /* + * Keep page content in memory to prevent durable page locking + */ + entry->list = (ItemPointerData *) palloc( BLCKSZ ); + entry->nlist = GinPageGetOpaque(page)->maxoff; + memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), + GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); - /* walk to right */ - while ((blkno = GinPageGetOpaque(page)->rightlink) != InvalidBlockNumber) - { LockBuffer(entry->buffer, GIN_UNLOCK); - entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno); - LockBuffer(entry->buffer, GIN_SHARE); - page = BufferGetPage(entry->buffer); - - entry->offset = InvalidOffsetNumber; - if (findItemInPage(page, &entry->curItem, &entry->offset)) - return; + freeGinBtreeStack(gdi->stack); + pfree(gdi); + entry->isFinished = FALSE; + } + else if (GinGetNPosting(itup) > 0) + { + entry->nlist = GinGetNPosting(itup); + entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); + memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); + entry->isFinished = FALSE; } - - /* - * curItem and any greated items was deleted by concurrent vacuum, so - * we finished scan with currrent entry - */ } -} -static void -stopScanEntry(GinScanEntry entry) -{ - if (entry->buffer != InvalidBuffer) - LockBuffer(entry->buffer, GIN_UNLOCK); + if (needUnlock) + LockBuffer(stackEntry->buffer, GIN_UNLOCK); + freeGinBtreeStack(stackEntry); } static void @@ -209,47 +135,38 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key) { uint32 i; + if (!key->firstCall) + return; + for (i = 0; i < key->nentries; i++) - startScanEntry(index, ginstate, key->scanEntry + i, key->firstCall); + startScanEntry(index, ginstate, key->scanEntry + i); - if (key->firstCall) - { - memset(key->entryRes, TRUE, sizeof(bool) * key->nentries); - key->isFinished = FALSE; - key->firstCall = FALSE; + memset(key->entryRes, TRUE, sizeof(bool) * key->nentries); + key->isFinished = FALSE; + key->firstCall = FALSE; - if (GinFuzzySearchLimit > 0) - { - /* - * If all of keys more than threshold we will try to reduce - * result, we hope (and only hope, for intersection operation of - * array our supposition isn't true), that total result will not - * more than minimal predictNumberResult. - */ + if (GinFuzzySearchLimit > 0) + { + /* + * If all of keys more than threshold we will try to reduce + * result, we hope (and only hope, for intersection operation of + * array our supposition isn't true), that total result will not + * more than minimal predictNumberResult. + */ - for (i = 0; i < key->nentries; i++) - if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit) - return; + for (i = 0; i < key->nentries; i++) + if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit) + return; - for (i = 0; i < key->nentries; i++) - if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit) - { - key->scanEntry[i].predictNumberResult /= key->nentries; - key->scanEntry[i].reduceResult = TRUE; - } - } + for (i = 0; i < key->nentries; i++) + if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit) + { + key->scanEntry[i].predictNumberResult /= key->nentries; + key->scanEntry[i].reduceResult = TRUE; + } } } -static void -stopScanKey(GinScanKey key) -{ - uint32 i; - - for (i = 0; i < key->nentries; i++) - stopScanEntry(key->scanEntry + i); -} - static void startScan(IndexScanDesc scan) { @@ -260,44 +177,82 @@ startScan(IndexScanDesc scan) startScanKey(scan->indexRelation, &so->ginstate, so->keys + i); } -static void -stopScan(IndexScanDesc scan) -{ - uint32 i; - GinScanOpaque so = (GinScanOpaque) scan->opaque; - - for (i = 0; i < so->nkeys; i++) - stopScanKey(so->keys + i); -} - - +/* + * Gets next ItemPointer from PostingTree. Note, that we copy + * page into GinScanEntry->list array and unlock page, but keep it pinned + * to prevent interference with vacuum + */ static void entryGetNextItem(Relation index, GinScanEntry entry) { - Page page = BufferGetPage(entry->buffer); + Page page; + BlockNumber blkno; - entry->offset++; - if (entry->offset <= GinPageGetOpaque(page)->maxoff && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber) + for(;;) { - entry->curItem = *(ItemPointerData *) GinDataPageGetItem(page, entry->offset); - } - else - { - BlockNumber blkno = GinPageGetOpaque(page)->rightlink; + entry->offset++; - LockBuffer(entry->buffer, GIN_UNLOCK); - if (blkno == InvalidBlockNumber) + if (entry->offset <= entry->nlist) { - ReleaseBuffer(entry->buffer); - entry->buffer = InvalidBuffer; - entry->isFinished = TRUE; + entry->curItem = entry->list[entry->offset - 1]; + return; } - else + + LockBuffer(entry->buffer, GIN_SHARE); + page = BufferGetPage(entry->buffer); + for(;;) { + /* + * It's needed to go by right link. During that we should refind + * first ItemPointer greater that stored + */ + + blkno = GinPageGetOpaque(page)->rightlink; + + LockBuffer(entry->buffer, GIN_UNLOCK); + if (blkno == InvalidBlockNumber) + { + ReleaseBuffer(entry->buffer); + ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber); + entry->buffer = InvalidBuffer; + entry->isFinished = TRUE; + return; + } + entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno); LockBuffer(entry->buffer, GIN_SHARE); + page = BufferGetPage(entry->buffer); + entry->offset = InvalidOffsetNumber; - entryGetNextItem(index, entry); + if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset)) + { + /* + * Found position equal to or greater than stored + */ + entry->nlist = GinPageGetOpaque(page)->maxoff; + memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), + GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); + + LockBuffer(entry->buffer, GIN_UNLOCK); + + if ( !ItemPointerIsValid(&entry->curItem) || + compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 ) + { + /* + * First pages are deleted or empty, or we found exact position, + * so break inner loop and continue outer one. + */ + + break; + } + + /* + * Find greater than entry->curItem position, store it. + */ + entry->curItem = entry->list[entry->offset - 1]; + + return; + } } } } @@ -317,7 +272,7 @@ entryGetItem(Relation index, GinScanEntry entry) entry->isFinished = entry->master->isFinished; entry->curItem = entry->master->curItem; } - else if (entry->list) + else if (!BufferIsValid(entry->buffer)) { entry->offset++; if (entry->offset <= entry->nlist) @@ -501,8 +456,6 @@ gingetmulti(PG_FUNCTION_ARGS) break; } while (*returned_tids < max_tids); - stopScan(scan); - PG_RETURN_BOOL(*returned_tids == max_tids); } @@ -524,7 +477,6 @@ gingettuple(PG_FUNCTION_ARGS) startScan(scan); res = scanGetItem(scan, &scan->xs_ctup.t_self); - stopScan(scan); PG_RETURN_BOOL(res); } diff --git a/src/include/access/gin.h b/src/include/access/gin.h index d27ba66a5970ff59b831a5d97856ccf371682a6c..8e3a25e9d4301b26b66f17941aad177e1a7e5e38 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -4,7 +4,7 @@ * * Copyright (c) 2006-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.16 2008/01/01 19:45:56 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.16.2.1 2008/04/22 17:54:19 teodor Exp $ *-------------------------------------------------------------------------- */ @@ -356,14 +356,16 @@ typedef struct GinScanEntryData /* entry, got from extractQueryFn */ Datum entry; - /* current ItemPointer to heap, its offset in buffer and buffer */ - ItemPointerData curItem; - OffsetNumber offset; + /* Current page in posting tree */ Buffer buffer; - /* in case of Posing list */ + /* current ItemPointer to heap */ + ItemPointerData curItem; + + /* used for Posting list and one page in Posting tree */ ItemPointerData *list; - uint32 nlist; + uint32 nlist; + OffsetNumber offset; bool isFinished; bool reduceResult;