Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Gpdb
提交
88dc31e3
G
Gpdb
项目概览
Greenplum
/
Gpdb
通知
7
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
G
Gpdb
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
88dc31e3
编写于
2月 23, 2003
作者:
T
Tom Lane
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
First cut at recycling space in btree indexes. Still some rough edges
to fix, but it seems to basically work...
上级
27854915
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
745 addition
and
32 deletion
+745
-32
src/backend/access/common/indextuple.c
src/backend/access/common/indextuple.c
+8
-9
src/backend/access/nbtree/nbtinsert.c
src/backend/access/nbtree/nbtinsert.c
+2
-5
src/backend/access/nbtree/nbtpage.c
src/backend/access/nbtree/nbtpage.c
+487
-2
src/backend/access/nbtree/nbtree.c
src/backend/access/nbtree/nbtree.c
+57
-6
src/backend/access/nbtree/nbtxlog.c
src/backend/access/nbtree/nbtxlog.c
+172
-3
src/backend/storage/freespace/freespace.c
src/backend/storage/freespace/freespace.c
+10
-2
src/include/access/itup.h
src/include/access/itup.h
+3
-3
src/include/access/nbtree.h
src/include/access/nbtree.h
+6
-2
未找到文件。
src/backend/access/common/indextuple.c
浏览文件 @
88dc31e3
...
...
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/indextuple.c,v 1.6
3 2002/11/13 00:39:46 momjian
Exp $
* $Header: /cvsroot/pgsql/src/backend/access/common/indextuple.c,v 1.6
4 2003/02/23 06:17:12 tgl
Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -394,17 +394,16 @@ nocache_index_getattr(IndexTuple tup,
}
/*
* Copies source into target. If *target == NULL, we palloc space; otherwise
* we assume we have space that is already palloc'ed.
* Create a palloc'd copy of an index tuple.
*/
void
CopyIndexTuple
(
IndexTuple
source
,
IndexTuple
*
target
)
IndexTuple
CopyIndexTuple
(
IndexTuple
source
)
{
IndexTuple
result
;
Size
size
;
size
=
IndexTupleSize
(
source
);
if
(
*
target
==
NULL
)
*
target
=
(
IndexTuple
)
palloc
(
size
);
memmove
((
char
*
)
*
target
,
(
char
*
)
source
,
size
);
result
=
(
IndexTuple
)
palloc
(
size
);
memcpy
(
result
,
source
,
size
);
return
result
;
}
src/backend/access/nbtree/nbtinsert.c
浏览文件 @
88dc31e3
...
...
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.9
8 2003/02/22 00:45:0
3 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.9
9 2003/02/23 06:17:1
3 tgl Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -58,7 +58,6 @@ static OffsetNumber _bt_findsplitloc(Relation rel, Page page,
static
void
_bt_checksplitloc
(
FindSplitData
*
state
,
OffsetNumber
firstright
,
int
leftfree
,
int
rightfree
,
bool
newitemonleft
,
Size
firstrightitemsz
);
static
Buffer
_bt_getstackbuf
(
Relation
rel
,
BTStack
stack
,
int
access
);
static
void
_bt_pgaddtup
(
Relation
rel
,
Page
page
,
Size
itemsize
,
BTItem
btitem
,
OffsetNumber
itup_off
,
const
char
*
where
);
...
...
@@ -666,7 +665,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
rightoff
;
OffsetNumber
maxoff
;
OffsetNumber
i
;
BTItem
lhikey
;
rbuf
=
_bt_getbuf
(
rel
,
P_NEW
,
BT_WRITE
);
origpage
=
BufferGetPage
(
buf
);
...
...
@@ -730,7 +728,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
itemsz
=
ItemIdGetLength
(
itemid
);
item
=
(
BTItem
)
PageGetItem
(
origpage
,
itemid
);
}
lhikey
=
item
;
if
(
PageAddItem
(
leftpage
,
(
Item
)
item
,
itemsz
,
leftoff
,
LP_USED
)
==
InvalidOffsetNumber
)
elog
(
PANIC
,
"btree: failed to add hikey to the left sibling"
);
...
...
@@ -1262,7 +1259,7 @@ _bt_insert_parent(Relation rel,
*
* Returns InvalidBuffer if item not found (should not happen).
*/
static
Buffer
Buffer
_bt_getstackbuf
(
Relation
rel
,
BTStack
stack
,
int
access
)
{
BlockNumber
blkno
;
...
...
src/backend/access/nbtree/nbtpage.c
浏览文件 @
88dc31e3
...
...
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.6
0 2003/02/22 00:45:04
tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.6
1 2003/02/23 06:17:13
tgl Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
...
...
@@ -24,6 +24,7 @@
#include "access/nbtree.h"
#include "miscadmin.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
...
...
@@ -391,7 +392,38 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
bool
needLock
;
Page
page
;
/* XXX soon: ask FSM about free space */
Assert
(
access
==
BT_WRITE
);
/*
* First see if the FSM knows of any free pages.
*
* We can't trust the FSM's report unreservedly; we have to check
* that the page is still free. (For example, an already-free page
* could have been re-used between the time the last VACUUM scanned
* it and the time the VACUUM made its FSM updates.)
*
* The request size should be more than half of what btvacuumcleanup
* logs as the per-page free space. We use BLCKSZ/2 and BLCKSZ-1
* to try to get some use out of FSM's space management algorithm.
* XXX this needs some more thought...
*/
for
(;;)
{
blkno
=
GetPageWithFreeSpace
(
&
rel
->
rd_node
,
BLCKSZ
/
2
);
if
(
blkno
==
InvalidBlockNumber
)
break
;
buf
=
ReadBuffer
(
rel
,
blkno
);
LockBuffer
(
buf
,
access
);
page
=
BufferGetPage
(
buf
);
if
(
_bt_page_recyclable
(
page
))
{
/* Okay to use page. Re-initialize and return it */
_bt_pageinit
(
page
,
BufferGetPageSize
(
buf
));
return
buf
;
}
elog
(
DEBUG1
,
"_bt_getbuf: FSM returned nonrecyclable page"
);
_bt_relbuf
(
rel
,
buf
);
}
/*
* Extend the relation by one page.
...
...
@@ -487,6 +519,36 @@ _bt_pageinit(Page page, Size size)
PageInit
(
page
,
size
,
sizeof
(
BTPageOpaqueData
));
}
/*
* _bt_page_recyclable() -- Is an existing page recyclable?
*
* This exists to make sure _bt_getbuf and btvacuumcleanup have the same
* policy about whether a page is safe to re-use.
*/
bool
_bt_page_recyclable
(
Page
page
)
{
BTPageOpaque
opaque
;
/*
* It's possible to find an all-zeroes page in an index --- for example,
* a backend might successfully extend the relation one page and then
* crash before it is able to make a WAL entry for adding the page.
* If we find a zeroed page then reclaim it.
*/
if
(
PageIsNew
(
page
))
return
true
;
/*
* Otherwise, recycle if deleted and too old to have any processes
* interested in it.
*/
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
if
(
P_ISDELETED
(
opaque
)
&&
TransactionIdPrecedesOrEquals
(
opaque
->
btpo
.
xact
,
RecentGlobalXmin
))
return
true
;
return
false
;
}
/*
* _bt_metaproot() -- Change the root page of the btree.
*
...
...
@@ -605,3 +667,426 @@ _bt_itemdel(Relation rel, Buffer buf, ItemPointer tid)
END_CRIT_SECTION
();
}
/*
* _bt_pagedel() -- Delete a page from the b-tree.
*
* This action unlinks the page from the b-tree structure, removing all
* pointers leading to it --- but not touching its own left and right links.
* The page cannot be physically reclaimed right away, since other processes
* may currently be trying to follow links leading to the page; they have to
* be allowed to use its right-link to recover. See nbtree/README.
*
* On entry, the target buffer must be pinned and read-locked. This lock and
* pin will be dropped before exiting.
*
* Returns the number of pages successfully deleted (zero on failure; could
* be more than one if parent blocks were deleted).
*
* NOTE: this leaks memory. Rather than trying to clean up everything
* carefully, it's better to run it in a temp context that can be reset
* frequently.
*/
int
_bt_pagedel
(
Relation
rel
,
Buffer
buf
,
bool
vacuum_full
)
{
BlockNumber
target
,
leftsib
,
rightsib
,
parent
;
OffsetNumber
poffset
,
maxoff
;
uint32
targetlevel
,
ilevel
;
ItemId
itemid
;
BTItem
targetkey
,
btitem
;
ScanKey
itup_scankey
;
BTStack
stack
;
Buffer
lbuf
,
rbuf
,
pbuf
;
bool
parent_half_dead
;
bool
parent_one_child
;
bool
rightsib_empty
;
Buffer
metabuf
=
InvalidBuffer
;
Page
metapg
=
NULL
;
BTMetaPageData
*
metad
=
NULL
;
Page
page
;
BTPageOpaque
opaque
;
/*
* We can never delete rightmost pages nor root pages. While at it,
* check that page is not already deleted and is empty.
*/
page
=
BufferGetPage
(
buf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
if
(
P_RIGHTMOST
(
opaque
)
||
P_ISROOT
(
opaque
)
||
P_ISDELETED
(
opaque
)
||
P_FIRSTDATAKEY
(
opaque
)
<=
PageGetMaxOffsetNumber
(
page
))
{
_bt_relbuf
(
rel
,
buf
);
return
0
;
}
/*
* Save info about page, including a copy of its high key (it must
* have one, being non-rightmost).
*/
target
=
BufferGetBlockNumber
(
buf
);
targetlevel
=
opaque
->
btpo
.
level
;
leftsib
=
opaque
->
btpo_prev
;
itemid
=
PageGetItemId
(
page
,
P_HIKEY
);
targetkey
=
CopyBTItem
((
BTItem
)
PageGetItem
(
page
,
itemid
));
/*
* We need to get an approximate pointer to the page's parent page.
* Use the standard search mechanism to search for the page's high key;
* this will give us a link to either the current parent or someplace
* to its left (if there are multiple equal high keys). To avoid
* deadlocks, we'd better drop the target page lock first.
*/
_bt_relbuf
(
rel
,
buf
);
/* we need a scan key to do our search, so build one */
itup_scankey
=
_bt_mkscankey
(
rel
,
&
(
targetkey
->
bti_itup
));
/* find the leftmost leaf page containing this key */
stack
=
_bt_search
(
rel
,
rel
->
rd_rel
->
relnatts
,
itup_scankey
,
&
lbuf
,
BT_READ
);
/* don't need a pin on that either */
_bt_relbuf
(
rel
,
lbuf
);
/*
* If we are trying to delete an interior page, _bt_search did more
* than we needed. Locate the stack item pointing to our parent level.
*/
ilevel
=
0
;
for
(;;)
{
if
(
stack
==
NULL
)
elog
(
ERROR
,
"_bt_pagedel: not enough stack items"
);
if
(
ilevel
==
targetlevel
)
break
;
stack
=
stack
->
bts_parent
;
ilevel
++
;
}
/*
* We have to lock the pages we need to modify in the standard order:
* moving right, then up. Else we will deadlock against other writers.
*
* So, we need to find and write-lock the current left sibling of the
* target page. The sibling that was current a moment ago could have
* split, so we may have to move right. This search could fail if
* either the sibling or the target page was deleted by someone else
* meanwhile; if so, give up. (Right now, that should never happen,
* since page deletion is only done in VACUUM and there shouldn't be
* multiple VACUUMs concurrently on the same table.)
*/
if
(
leftsib
!=
P_NONE
)
{
lbuf
=
_bt_getbuf
(
rel
,
leftsib
,
BT_WRITE
);
page
=
BufferGetPage
(
lbuf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
while
(
P_ISDELETED
(
opaque
)
||
opaque
->
btpo_next
!=
target
)
{
/* step right one page */
leftsib
=
opaque
->
btpo_next
;
_bt_relbuf
(
rel
,
lbuf
);
if
(
leftsib
==
P_NONE
)
{
elog
(
LOG
,
"_bt_pagedel: no left sibling (concurrent deletion?)"
);
return
0
;
}
lbuf
=
_bt_getbuf
(
rel
,
leftsib
,
BT_WRITE
);
page
=
BufferGetPage
(
lbuf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
}
}
else
lbuf
=
InvalidBuffer
;
/*
* Next write-lock the target page itself. It should be okay to take just
* a write lock not a superexclusive lock, since no scans would stop on an
* empty page.
*/
buf
=
_bt_getbuf
(
rel
,
target
,
BT_WRITE
);
page
=
BufferGetPage
(
buf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
/*
* Check page is still empty etc, else abandon deletion. The empty check
* is necessary since someone else might have inserted into it while
* we didn't have it locked; the others are just for paranoia's sake.
*/
if
(
P_RIGHTMOST
(
opaque
)
||
P_ISROOT
(
opaque
)
||
P_ISDELETED
(
opaque
)
||
P_FIRSTDATAKEY
(
opaque
)
<=
PageGetMaxOffsetNumber
(
page
))
{
_bt_relbuf
(
rel
,
buf
);
if
(
BufferIsValid
(
lbuf
))
_bt_relbuf
(
rel
,
lbuf
);
return
0
;
}
if
(
opaque
->
btpo_prev
!=
leftsib
)
elog
(
ERROR
,
"_bt_pagedel: left link changed unexpectedly"
);
/*
* And next write-lock the (current) right sibling.
*/
rightsib
=
opaque
->
btpo_next
;
rbuf
=
_bt_getbuf
(
rel
,
rightsib
,
BT_WRITE
);
/*
* Next find and write-lock the current parent of the target page.
* This is essentially the same as the corresponding step of splitting.
*/
ItemPointerSet
(
&
(
stack
->
bts_btitem
.
bti_itup
.
t_tid
),
target
,
P_HIKEY
);
pbuf
=
_bt_getstackbuf
(
rel
,
stack
,
BT_WRITE
);
if
(
pbuf
==
InvalidBuffer
)
elog
(
ERROR
,
"_bt_getstackbuf: my bits moved right off the end of the world!"
"
\n\t
Recreate index %s."
,
RelationGetRelationName
(
rel
));
parent
=
stack
->
bts_blkno
;
poffset
=
stack
->
bts_offset
;
/*
* If the target is the rightmost child of its parent, then we can't
* delete, unless it's also the only child --- in which case the parent
* changes to half-dead status.
*/
page
=
BufferGetPage
(
pbuf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
maxoff
=
PageGetMaxOffsetNumber
(
page
);
parent_half_dead
=
false
;
parent_one_child
=
false
;
if
(
poffset
>=
maxoff
)
{
if
(
poffset
==
P_FIRSTDATAKEY
(
opaque
))
parent_half_dead
=
true
;
else
{
_bt_relbuf
(
rel
,
pbuf
);
_bt_relbuf
(
rel
,
rbuf
);
_bt_relbuf
(
rel
,
buf
);
if
(
BufferIsValid
(
lbuf
))
_bt_relbuf
(
rel
,
lbuf
);
return
0
;
}
}
else
{
/* Will there be exactly one child left in this parent? */
if
(
OffsetNumberNext
(
P_FIRSTDATAKEY
(
opaque
))
==
maxoff
)
parent_one_child
=
true
;
}
/*
* If we are deleting the next-to-last page on the target's level,
* then the rightsib is a candidate to become the new fast root.
* (In theory, it might be possible to push the fast root even further
* down, but the odds of doing so are slim, and the locking considerations
* daunting.)
*
* We can safely acquire a lock on the metapage here --- see comments for
* _bt_newroot().
*/
if
(
leftsib
==
P_NONE
)
{
page
=
BufferGetPage
(
rbuf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
Assert
(
opaque
->
btpo
.
level
==
targetlevel
);
if
(
P_RIGHTMOST
(
opaque
))
{
/* rightsib will be the only one left on the level */
metabuf
=
_bt_getbuf
(
rel
,
BTREE_METAPAGE
,
BT_WRITE
);
metapg
=
BufferGetPage
(
metabuf
);
metad
=
BTPageGetMeta
(
metapg
);
/*
* The expected case here is btm_fastlevel == targetlevel+1;
* if the fastlevel is <= targetlevel, something is wrong, and we
* choose to overwrite it to fix it.
*/
if
(
metad
->
btm_fastlevel
>
targetlevel
+
1
)
{
/* no update wanted */
_bt_relbuf
(
rel
,
metabuf
);
metabuf
=
InvalidBuffer
;
}
}
}
/*
* Here we begin doing the deletion.
*/
/* No elog(ERROR) until changes are logged */
START_CRIT_SECTION
();
/*
* Update parent. The normal case is a tad tricky because we want to
* delete the target's downlink and the *following* key. Easiest way is
* to copy the right sibling's downlink over the target downlink, and then
* delete the following item.
*/
page
=
BufferGetPage
(
pbuf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
if
(
parent_half_dead
)
{
PageIndexTupleDelete
(
page
,
poffset
);
opaque
->
btpo_flags
|=
BTP_HALF_DEAD
;
}
else
{
OffsetNumber
nextoffset
;
itemid
=
PageGetItemId
(
page
,
poffset
);
btitem
=
(
BTItem
)
PageGetItem
(
page
,
itemid
);
Assert
(
ItemPointerGetBlockNumber
(
&
(
btitem
->
bti_itup
.
t_tid
))
==
target
);
ItemPointerSet
(
&
(
btitem
->
bti_itup
.
t_tid
),
rightsib
,
P_HIKEY
);
nextoffset
=
OffsetNumberNext
(
poffset
);
/* This part is just for double-checking */
itemid
=
PageGetItemId
(
page
,
nextoffset
);
btitem
=
(
BTItem
)
PageGetItem
(
page
,
itemid
);
if
(
ItemPointerGetBlockNumber
(
&
(
btitem
->
bti_itup
.
t_tid
))
!=
rightsib
)
elog
(
PANIC
,
"_bt_pagedel: right sibling is not next child"
);
PageIndexTupleDelete
(
page
,
nextoffset
);
}
/*
* Update siblings' side-links. Note the target page's side-links will
* continue to point to the siblings.
*/
if
(
BufferIsValid
(
lbuf
))
{
page
=
BufferGetPage
(
lbuf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
Assert
(
opaque
->
btpo_next
==
target
);
opaque
->
btpo_next
=
rightsib
;
}
page
=
BufferGetPage
(
rbuf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
Assert
(
opaque
->
btpo_prev
==
target
);
opaque
->
btpo_prev
=
leftsib
;
rightsib_empty
=
(
P_FIRSTDATAKEY
(
opaque
)
>
PageGetMaxOffsetNumber
(
page
));
/*
* Mark the page itself deleted. It can be recycled when all current
* transactions are gone; or immediately if we're doing VACUUM FULL.
*/
page
=
BufferGetPage
(
buf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
opaque
->
btpo_flags
|=
BTP_DELETED
;
opaque
->
btpo
.
xact
=
vacuum_full
?
FrozenTransactionId
:
ReadNewTransactionId
();
/* And update the metapage, if needed */
if
(
BufferIsValid
(
metabuf
))
{
metad
->
btm_fastroot
=
rightsib
;
metad
->
btm_fastlevel
=
targetlevel
;
}
/* XLOG stuff */
if
(
!
rel
->
rd_istemp
)
{
xl_btree_delete_page
xlrec
;
xl_btree_metadata
xlmeta
;
uint8
xlinfo
;
XLogRecPtr
recptr
;
XLogRecData
rdata
[
5
];
XLogRecData
*
nextrdata
;
xlrec
.
target
.
node
=
rel
->
rd_node
;
ItemPointerSet
(
&
(
xlrec
.
target
.
tid
),
parent
,
poffset
);
xlrec
.
deadblk
=
target
;
xlrec
.
leftblk
=
leftsib
;
xlrec
.
rightblk
=
rightsib
;
rdata
[
0
].
buffer
=
InvalidBuffer
;
rdata
[
0
].
data
=
(
char
*
)
&
xlrec
;
rdata
[
0
].
len
=
SizeOfBtreeDeletePage
;
rdata
[
0
].
next
=
nextrdata
=
&
(
rdata
[
1
]);
if
(
BufferIsValid
(
metabuf
))
{
xlmeta
.
root
=
metad
->
btm_root
;
xlmeta
.
level
=
metad
->
btm_level
;
xlmeta
.
fastroot
=
metad
->
btm_fastroot
;
xlmeta
.
fastlevel
=
metad
->
btm_fastlevel
;
nextrdata
->
buffer
=
InvalidBuffer
;
nextrdata
->
data
=
(
char
*
)
&
xlmeta
;
nextrdata
->
len
=
sizeof
(
xl_btree_metadata
);
nextrdata
->
next
=
nextrdata
+
1
;
nextrdata
++
;
xlinfo
=
XLOG_BTREE_DELETE_PAGE_META
;
}
else
xlinfo
=
XLOG_BTREE_DELETE_PAGE
;
nextrdata
->
buffer
=
pbuf
;
nextrdata
->
data
=
NULL
;
nextrdata
->
len
=
0
;
nextrdata
->
next
=
nextrdata
+
1
;
nextrdata
++
;
nextrdata
->
buffer
=
rbuf
;
nextrdata
->
data
=
NULL
;
nextrdata
->
len
=
0
;
nextrdata
->
next
=
NULL
;
if
(
BufferIsValid
(
lbuf
))
{
nextrdata
->
next
=
nextrdata
+
1
;
nextrdata
++
;
nextrdata
->
buffer
=
lbuf
;
nextrdata
->
data
=
NULL
;
nextrdata
->
len
=
0
;
nextrdata
->
next
=
NULL
;
}
recptr
=
XLogInsert
(
RM_BTREE_ID
,
xlinfo
,
rdata
);
if
(
BufferIsValid
(
metabuf
))
{
PageSetLSN
(
metapg
,
recptr
);
PageSetSUI
(
metapg
,
ThisStartUpID
);
}
page
=
BufferGetPage
(
pbuf
);
PageSetLSN
(
page
,
recptr
);
PageSetSUI
(
page
,
ThisStartUpID
);
page
=
BufferGetPage
(
rbuf
);
PageSetLSN
(
page
,
recptr
);
PageSetSUI
(
page
,
ThisStartUpID
);
page
=
BufferGetPage
(
buf
);
PageSetLSN
(
page
,
recptr
);
PageSetSUI
(
page
,
ThisStartUpID
);
if
(
BufferIsValid
(
lbuf
))
{
page
=
BufferGetPage
(
lbuf
);
PageSetLSN
(
page
,
recptr
);
PageSetSUI
(
page
,
ThisStartUpID
);
}
}
END_CRIT_SECTION
();
/* Write and release buffers */
if
(
BufferIsValid
(
metabuf
))
_bt_wrtbuf
(
rel
,
metabuf
);
_bt_wrtbuf
(
rel
,
pbuf
);
_bt_wrtbuf
(
rel
,
rbuf
);
_bt_wrtbuf
(
rel
,
buf
);
if
(
BufferIsValid
(
lbuf
))
_bt_wrtbuf
(
rel
,
lbuf
);
/*
* If parent became half dead, recurse to try to delete it. Otherwise,
* if right sibling is empty and is now the last child of the parent,
* recurse to try to delete it. (These cases cannot apply at the same
* time, though the second case might itself recurse to the first.)
*/
if
(
parent_half_dead
)
{
buf
=
_bt_getbuf
(
rel
,
parent
,
BT_READ
);
return
_bt_pagedel
(
rel
,
buf
,
vacuum_full
)
+
1
;
}
if
(
parent_one_child
&&
rightsib_empty
)
{
buf
=
_bt_getbuf
(
rel
,
rightsib
,
BT_READ
);
return
_bt_pagedel
(
rel
,
buf
,
vacuum_full
)
+
1
;
}
return
1
;
}
src/backend/access/nbtree/nbtree.c
浏览文件 @
88dc31e3
...
...
@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.9
6 2003/02/22 00:45:04
tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.9
7 2003/02/23 06:17:13
tgl Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -710,15 +710,16 @@ Datum
btvacuumcleanup
(
PG_FUNCTION_ARGS
)
{
Relation
rel
=
(
Relation
)
PG_GETARG_POINTER
(
0
);
#ifdef NOT_USED
IndexVacuumCleanupInfo
*
info
=
(
IndexVacuumCleanupInfo
*
)
PG_GETARG_POINTER
(
1
);
#endif
IndexBulkDeleteResult
*
stats
=
(
IndexBulkDeleteResult
*
)
PG_GETARG_POINTER
(
2
);
BlockNumber
num_pages
;
BlockNumber
blkno
;
PageFreeSpaceInfo
*
pageSpaces
;
int
nFreePages
,
maxFreePages
;
BlockNumber
pages_deleted
=
0
;
MemoryContext
mycontext
;
MemoryContext
oldcontext
;
Assert
(
stats
!=
NULL
);
...
...
@@ -731,6 +732,13 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
pageSpaces
=
(
PageFreeSpaceInfo
*
)
palloc
(
maxFreePages
*
sizeof
(
PageFreeSpaceInfo
));
nFreePages
=
0
;
/* Create a temporary memory context to run _bt_pagedel in */
mycontext
=
AllocSetContextCreate
(
CurrentMemoryContext
,
"_bt_pagedel"
,
ALLOCSET_DEFAULT_MINSIZE
,
ALLOCSET_DEFAULT_INITSIZE
,
ALLOCSET_DEFAULT_MAXSIZE
);
/*
* Scan through all pages of index, except metapage. (Any pages added
* after we start the scan will not be examined; this should be fine,
...
...
@@ -745,17 +753,53 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
buf
=
_bt_getbuf
(
rel
,
blkno
,
BT_READ
);
page
=
BufferGetPage
(
buf
);
opaque
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
if
(
P_ISDELETED
(
opaqu
e
))
if
(
_bt_page_recyclable
(
pag
e
))
{
/*
XXX if safe-to-reclaim...
*/
/*
Okay to recycle this page
*/
if
(
nFreePages
<
maxFreePages
)
{
pageSpaces
[
nFreePages
].
blkno
=
blkno
;
/*
The avail-space value is bogus, but
must be < BLCKSZ */
/*
claimed avail-space
must be < BLCKSZ */
pageSpaces
[
nFreePages
].
avail
=
BLCKSZ
-
1
;
nFreePages
++
;
}
}
else
if
((
opaque
->
btpo_flags
&
BTP_HALF_DEAD
)
||
P_FIRSTDATAKEY
(
opaque
)
>
PageGetMaxOffsetNumber
(
page
))
{
/* Empty, try to delete */
int
ndel
;
/* Run pagedel in a temp context to avoid memory leakage */
MemoryContextReset
(
mycontext
);
oldcontext
=
MemoryContextSwitchTo
(
mycontext
);
ndel
=
_bt_pagedel
(
rel
,
buf
,
info
->
vacuum_full
);
pages_deleted
+=
ndel
;
/*
* During VACUUM FULL it's okay to recycle deleted pages
* immediately, since there can be no other transactions
* scanning the index. Note that we will only recycle the
* current page and not any parent pages that _bt_pagedel
* might have recursed to; this seems reasonable in the name
* of simplicity. (Trying to do otherwise would mean we'd
* have to sort the list of recyclable pages we're building.)
*/
if
(
ndel
&&
info
->
vacuum_full
)
{
if
(
nFreePages
<
maxFreePages
)
{
pageSpaces
[
nFreePages
].
blkno
=
blkno
;
/* claimed avail-space must be < BLCKSZ */
pageSpaces
[
nFreePages
].
avail
=
BLCKSZ
-
1
;
nFreePages
++
;
}
}
MemoryContextSwitchTo
(
oldcontext
);
continue
;
/* pagedel released buffer */
}
_bt_relbuf
(
rel
,
buf
);
}
...
...
@@ -768,6 +812,13 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
pfree
(
pageSpaces
);
MemoryContextDelete
(
mycontext
);
if
(
pages_deleted
>
0
)
elog
(
info
->
message_level
,
"Index %s: %u pages, deleted %u; %u now free"
,
RelationGetRelationName
(
rel
),
num_pages
,
pages_deleted
,
nFreePages
);
/* update statistics */
stats
->
num_pages
=
num_pages
;
stats
->
pages_free
=
nFreePages
;
...
...
src/backend/access/nbtree/nbtxlog.c
浏览文件 @
88dc31e3
...
...
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.
1 2003/02/21 00:06:21
tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.
2 2003/02/23 06:17:13
tgl Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -403,6 +403,171 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
UnlockAndWriteBuffer
(
buffer
);
}
static
void
btree_xlog_delete_page
(
bool
redo
,
bool
ismeta
,
XLogRecPtr
lsn
,
XLogRecord
*
record
)
{
xl_btree_delete_page
*
xlrec
=
(
xl_btree_delete_page
*
)
XLogRecGetData
(
record
);
Relation
reln
;
BlockNumber
parent
;
BlockNumber
target
;
BlockNumber
leftsib
;
BlockNumber
rightsib
;
Buffer
buffer
;
Page
page
;
BTPageOpaque
pageop
;
char
*
op
=
(
redo
)
?
"redo"
:
"undo"
;
reln
=
XLogOpenRelation
(
redo
,
RM_BTREE_ID
,
xlrec
->
target
.
node
);
if
(
!
RelationIsValid
(
reln
))
return
;
parent
=
ItemPointerGetBlockNumber
(
&
(
xlrec
->
target
.
tid
));
target
=
xlrec
->
deadblk
;
leftsib
=
xlrec
->
leftblk
;
rightsib
=
xlrec
->
rightblk
;
/* parent page */
if
(
redo
&&
!
(
record
->
xl_info
&
XLR_BKP_BLOCK_1
))
{
buffer
=
XLogReadBuffer
(
false
,
reln
,
parent
);
if
(
!
BufferIsValid
(
buffer
))
elog
(
PANIC
,
"btree_delete_page_redo: parent block unfound"
);
page
=
(
Page
)
BufferGetPage
(
buffer
);
pageop
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
if
(
PageIsNew
((
PageHeader
)
page
))
elog
(
PANIC
,
"btree_delete_page_redo: uninitialized parent page"
);
if
(
XLByteLE
(
lsn
,
PageGetLSN
(
page
)))
{
UnlockAndReleaseBuffer
(
buffer
);
}
else
{
OffsetNumber
poffset
;
poffset
=
ItemPointerGetOffsetNumber
(
&
(
xlrec
->
target
.
tid
));
if
(
poffset
>=
PageGetMaxOffsetNumber
(
page
))
{
Assert
(
poffset
==
P_FIRSTDATAKEY
(
pageop
));
PageIndexTupleDelete
(
page
,
poffset
);
pageop
->
btpo_flags
|=
BTP_HALF_DEAD
;
}
else
{
ItemId
itemid
;
BTItem
btitem
;
OffsetNumber
nextoffset
;
itemid
=
PageGetItemId
(
page
,
poffset
);
btitem
=
(
BTItem
)
PageGetItem
(
page
,
itemid
);
ItemPointerSet
(
&
(
btitem
->
bti_itup
.
t_tid
),
rightsib
,
P_HIKEY
);
nextoffset
=
OffsetNumberNext
(
poffset
);
PageIndexTupleDelete
(
page
,
nextoffset
);
}
PageSetLSN
(
page
,
lsn
);
PageSetSUI
(
page
,
ThisStartUpID
);
UnlockAndWriteBuffer
(
buffer
);
}
}
/* Fix left-link of right sibling */
if
(
redo
&&
!
(
record
->
xl_info
&
XLR_BKP_BLOCK_2
))
{
buffer
=
XLogReadBuffer
(
false
,
reln
,
rightsib
);
if
(
!
BufferIsValid
(
buffer
))
elog
(
PANIC
,
"btree_delete_page_redo: lost right sibling"
);
page
=
(
Page
)
BufferGetPage
(
buffer
);
if
(
PageIsNew
((
PageHeader
)
page
))
elog
(
PANIC
,
"btree_delete_page_redo: uninitialized right sibling"
);
if
(
XLByteLE
(
lsn
,
PageGetLSN
(
page
)))
{
UnlockAndReleaseBuffer
(
buffer
);
}
else
{
pageop
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
pageop
->
btpo_prev
=
leftsib
;
PageSetLSN
(
page
,
lsn
);
PageSetSUI
(
page
,
ThisStartUpID
);
UnlockAndWriteBuffer
(
buffer
);
}
}
/* Fix right-link of left sibling, if any */
if
(
redo
&&
!
(
record
->
xl_info
&
XLR_BKP_BLOCK_3
))
{
if
(
leftsib
!=
P_NONE
)
{
buffer
=
XLogReadBuffer
(
false
,
reln
,
leftsib
);
if
(
!
BufferIsValid
(
buffer
))
elog
(
PANIC
,
"btree_delete_page_redo: lost left sibling"
);
page
=
(
Page
)
BufferGetPage
(
buffer
);
if
(
PageIsNew
((
PageHeader
)
page
))
elog
(
PANIC
,
"btree_delete_page_redo: uninitialized left sibling"
);
if
(
XLByteLE
(
lsn
,
PageGetLSN
(
page
)))
{
UnlockAndReleaseBuffer
(
buffer
);
}
else
{
pageop
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
pageop
->
btpo_next
=
rightsib
;
PageSetLSN
(
page
,
lsn
);
PageSetSUI
(
page
,
ThisStartUpID
);
UnlockAndWriteBuffer
(
buffer
);
}
}
}
/* Rewrite target page as empty deleted page */
buffer
=
XLogReadBuffer
(
false
,
reln
,
target
);
if
(
!
BufferIsValid
(
buffer
))
elog
(
PANIC
,
"btree_delete_page_%s: lost target page"
,
op
);
page
=
(
Page
)
BufferGetPage
(
buffer
);
if
(
redo
)
_bt_pageinit
(
page
,
BufferGetPageSize
(
buffer
));
else
if
(
PageIsNew
((
PageHeader
)
page
))
elog
(
PANIC
,
"btree_delete_page_undo: uninitialized target page"
);
pageop
=
(
BTPageOpaque
)
PageGetSpecialPointer
(
page
);
if
(
redo
)
{
pageop
->
btpo_prev
=
leftsib
;
pageop
->
btpo_next
=
rightsib
;
pageop
->
btpo
.
xact
=
FrozenTransactionId
;
pageop
->
btpo_flags
=
BTP_DELETED
;
PageSetLSN
(
page
,
lsn
);
PageSetSUI
(
page
,
ThisStartUpID
);
UnlockAndWriteBuffer
(
buffer
);
}
else
{
/* undo */
if
(
XLByteLT
(
PageGetLSN
(
page
),
lsn
))
elog
(
PANIC
,
"btree_delete_page_undo: bad left sibling LSN"
);
elog
(
PANIC
,
"btree_delete_page_undo: unimplemented"
);
}
/* Update metapage if needed */
if
(
redo
)
/* metapage changes not undoable */
{
if
(
ismeta
)
{
xl_btree_metadata
md
;
memcpy
(
&
md
,
(
char
*
)
xlrec
+
SizeOfBtreeDeletePage
,
sizeof
(
xl_btree_metadata
));
_bt_restore_meta
(
reln
,
lsn
,
md
.
root
,
md
.
level
,
md
.
fastroot
,
md
.
fastlevel
);
}
}
}
static
void
btree_xlog_newroot
(
bool
redo
,
XLogRecPtr
lsn
,
XLogRecord
*
record
)
{
...
...
@@ -534,8 +699,10 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
btree_xlog_delete
(
true
,
lsn
,
record
);
break
;
case
XLOG_BTREE_DELETE_PAGE
:
btree_xlog_delete_page
(
true
,
false
,
lsn
,
record
);
break
;
case
XLOG_BTREE_DELETE_PAGE_META
:
// ???
btree_xlog_delete_page
(
true
,
true
,
lsn
,
record
);
break
;
case
XLOG_BTREE_NEWROOT
:
btree_xlog_newroot
(
true
,
lsn
,
record
);
...
...
@@ -583,8 +750,10 @@ btree_undo(XLogRecPtr lsn, XLogRecord *record)
btree_xlog_delete
(
false
,
lsn
,
record
);
break
;
case
XLOG_BTREE_DELETE_PAGE
:
btree_xlog_delete_page
(
false
,
false
,
lsn
,
record
);
break
;
case
XLOG_BTREE_DELETE_PAGE_META
:
// ???
btree_xlog_delete_page
(
false
,
true
,
lsn
,
record
);
break
;
case
XLOG_BTREE_NEWROOT
:
btree_xlog_newroot
(
false
,
lsn
,
record
);
...
...
src/backend/storage/freespace/freespace.c
浏览文件 @
88dc31e3
...
...
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.1
4 2002/09/20 19:56:01
tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.1
5 2003/02/23 06:17:13
tgl Exp $
*
*
* NOTES:
...
...
@@ -681,7 +681,9 @@ free_chunk_chain(FSMChunk *fchunk)
* Look to see if a page with at least the specified amount of space is
* available in the given FSMRelation. If so, return its page number,
* and advance the nextPage counter so that the next inquiry will return
* a different page if possible. Return InvalidBlockNumber if no success.
* a different page if possible; also update the entry to show that the
* requested space is not available anymore. Return InvalidBlockNumber
* if no success.
*/
static
BlockNumber
find_free_space
(
FSMRelation
*
fsmrel
,
Size
spaceNeeded
)
...
...
@@ -713,6 +715,12 @@ find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
/* Check the next page */
if
((
Size
)
curChunk
->
bytes
[
chunkRelIndex
]
>=
spaceNeeded
)
{
/*
* Found what we want --- adjust the entry. In theory we could
* delete the entry immediately if it drops below threshold,
* but it seems better to wait till we next need space.
*/
curChunk
->
bytes
[
chunkRelIndex
]
-=
(
ItemLength
)
spaceNeeded
;
fsmrel
->
nextPage
=
pageIndex
+
1
;
return
curChunk
->
pages
[
chunkRelIndex
];
}
...
...
src/include/access/itup.h
浏览文件 @
88dc31e3
...
...
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: itup.h,v 1.3
6 2002/08/25 17:20:01
tgl Exp $
* $Id: itup.h,v 1.3
7 2003/02/23 06:17:13
tgl Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -133,11 +133,11 @@ typedef InsertIndexResultData *InsertIndexResult;
)
/*
indextuple.h
*/
/*
routines in indextuple.c
*/
extern
IndexTuple
index_formtuple
(
TupleDesc
tupleDescriptor
,
Datum
*
value
,
char
*
null
);
extern
Datum
nocache_index_getattr
(
IndexTuple
tup
,
int
attnum
,
TupleDesc
tupleDesc
,
bool
*
isnull
);
extern
void
CopyIndexTuple
(
IndexTuple
source
,
IndexTuple
*
target
);
extern
IndexTuple
CopyIndexTuple
(
IndexTuple
source
);
#endif
/* ITUP_H */
src/include/access/nbtree.h
浏览文件 @
88dc31e3
...
...
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: nbtree.h,v 1.6
5 2003/02/22 00:45:05
tgl Exp $
* $Id: nbtree.h,v 1.6
6 2003/02/23 06:17:13
tgl Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -118,6 +118,8 @@ typedef struct BTItemData
typedef
BTItemData
*
BTItem
;
#define CopyBTItem(btitem) ((BTItem) CopyIndexTuple((IndexTuple) (btitem)))
/*
* For XLOG: size without alignment. Sizeof works as long as
* IndexTupleData has exactly 8 bytes.
...
...
@@ -434,6 +436,7 @@ extern Datum btvacuumcleanup(PG_FUNCTION_ARGS);
*/
extern
InsertIndexResult
_bt_doinsert
(
Relation
rel
,
BTItem
btitem
,
bool
index_is_unique
,
Relation
heapRel
);
extern
Buffer
_bt_getstackbuf
(
Relation
rel
,
BTStack
stack
,
int
access
);
extern
void
_bt_insert_parent
(
Relation
rel
,
Buffer
buf
,
Buffer
rbuf
,
BTStack
stack
,
bool
is_root
,
bool
is_only
);
...
...
@@ -448,8 +451,10 @@ extern void _bt_relbuf(Relation rel, Buffer buf);
extern
void
_bt_wrtbuf
(
Relation
rel
,
Buffer
buf
);
extern
void
_bt_wrtnorelbuf
(
Relation
rel
,
Buffer
buf
);
extern
void
_bt_pageinit
(
Page
page
,
Size
size
);
extern
bool
_bt_page_recyclable
(
Page
page
);
extern
void
_bt_metaproot
(
Relation
rel
,
BlockNumber
rootbknum
,
uint32
level
);
extern
void
_bt_itemdel
(
Relation
rel
,
Buffer
buf
,
ItemPointer
tid
);
extern
int
_bt_pagedel
(
Relation
rel
,
Buffer
buf
,
bool
vacuum_full
);
/*
* prototypes for functions in nbtsearch.c
...
...
@@ -488,7 +493,6 @@ extern BTItem _bt_formitem(IndexTuple itup);
/*
* prototypes for functions in nbtsort.c
*/
typedef
struct
BTSpool
BTSpool
;
/* opaque type known only within nbtsort.c */
extern
BTSpool
*
_bt_spoolinit
(
Relation
index
,
bool
isunique
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录