未验证 提交 4fa61b9e 编写于 作者: W wade zhang 提交者: GitHub

Merge pull request #21699 from taosdata/fix/TD-24717

enh(tdb/recycle): first round implemenation of page recycling
......@@ -233,7 +233,11 @@ int tdbBtreeDelete(SBTree *pBt, const void *pKey, int kLen, TXN *pTxn) {
int ret;
tdbBtcOpen(&btc, pBt, pTxn);
/*
btc.coder.ofps = taosArrayInit(8, sizeof(SPage *));
// btc.coder.ofps = taosArrayInit(8, sizeof(SPgno));
//pBtc->coder.ofps = taosArrayInit(8, sizeof(SPage *));
*/
tdbTrace("tdb delete, btc: %p, pTxn: %p", &btc, pTxn);
// move the cursor
......@@ -254,7 +258,18 @@ int tdbBtreeDelete(SBTree *pBt, const void *pKey, int kLen, TXN *pTxn) {
tdbBtcClose(&btc);
return -1;
}
/*
SArray *ofps = btc.coder.ofps;
if (ofps) {
for (int i = 0; i < TARRAY_SIZE(ofps); ++i) {
SPage *ofp = *(SPage **)taosArrayGet(ofps, i);
tdbPagerInsertFreePage(btc.pBt->pPager, ofp, btc.pTxn);
}
taosArrayDestroy(ofps);
btc.coder.ofps = NULL;
}
*/
tdbBtcClose(&btc);
return 0;
}
......@@ -563,6 +578,7 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx
}
}
// copy the parent key out if child pages are not leaf page
// childNotLeaf = !(TDB_BTREE_PAGE_IS_LEAF(pOlds[0]) || TDB_BTREE_PAGE_IS_OVFL(pOlds[0]));
childNotLeaf = !TDB_BTREE_PAGE_IS_LEAF(pOlds[0]);
if (childNotLeaf) {
for (int i = 0; i < nOlds; i++) {
......@@ -592,7 +608,30 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx
for (int i = 0; i < nOlds; i++) {
nCells = TDB_PAGE_TOTAL_CELLS(pParent);
if (sIdx < nCells) {
bool destroyOfps = false;
if (!childNotLeaf) {
if (!pParent->pPager->ofps) {
pParent->pPager->ofps = taosArrayInit(8, sizeof(SPage *));
destroyOfps = true;
}
}
tdbPageDropCell(pParent, sIdx, pTxn, pBt);
if (!childNotLeaf) {
SArray *ofps = pParent->pPager->ofps;
if (ofps) {
for (int i = 0; i < TARRAY_SIZE(ofps); ++i) {
SPage *ofp = *(SPage **)taosArrayGet(ofps, i);
tdbPagerInsertFreePage(pParent->pPager, ofp, pTxn);
}
if (destroyOfps) {
taosArrayDestroy(ofps);
pParent->pPager->ofps = NULL;
}
}
}
} else {
((SIntHdr *)pParent->pData)->pgno = 0;
}
......@@ -861,6 +900,8 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx
if (!TDB_BTREE_PAGE_IS_LEAF(pNews[0])) {
((SIntHdr *)(pParent->pData))->pgno = ((SIntHdr *)(pNews[0]->pData))->pgno;
}
tdbPagerInsertFreePage(pBt->pPager, pNews[0], pTxn);
}
for (int i = 0; i < 3; i++) {
......@@ -870,6 +911,9 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx
}
for (pageIdx = 0; pageIdx < nOlds; ++pageIdx) {
if (pageIdx >= nNews) {
tdbPagerInsertFreePage(pBt->pPager, pOlds[pageIdx], pTxn);
}
tdbPagerReturnPage(pBt->pPager, pOlds[pageIdx], pTxn);
}
for (; pageIdx < nNews; ++pageIdx) {
......@@ -1311,7 +1355,11 @@ static int tdbBtreeDecodePayload(SPage *pPage, const SCell *pCell, int nHeader,
if (ret < 0) {
return -1;
}
/*
if (pDecoder->ofps) {
taosArrayPush(pDecoder->ofps, &ofp);
}
*/
ofpCell = tdbPageGetCell(ofp, 0);
if (nLeft <= ofp->maxLocal - sizeof(SPgno)) {
......@@ -1346,11 +1394,17 @@ static int tdbBtreeDecodePayload(SPage *pPage, const SCell *pCell, int nHeader,
int lastKeyPageSpace = 0;
// load left key & val to ovpages
while (pgno != 0) {
tdbTrace("tdb decode-ofp, pTxn: %p, pgno:%u by cell:%p", pTxn, pgno, pCell);
// printf("tdb decode-ofp, pTxn: %p, pgno:%u by cell:%p\n", pTxn, pgno, pCell);
ret = tdbLoadOvflPage(&pgno, &ofp, pTxn, pBt);
if (ret < 0) {
return -1;
}
/*
if (pDecoder->ofps) {
taosArrayPush(pDecoder->ofps, &ofp);
}
*/
ofpCell = tdbPageGetCell(ofp, 0);
int lastKeyPage = 0;
......@@ -1518,8 +1572,8 @@ static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell, int dropOfp, TXN *
if (pPage->vLen == TDB_VARIANT_LEN) {
if (!leaf) {
tdbError("tdb/btree-cell-size: not a leaf page.");
return -1;
tdbError("tdb/btree-cell-size: not a leaf page:%p, pgno:%" PRIu32 ".", pPage, TDB_PAGE_PGNO(pPage));
// return -1;
}
nHeader += tdbGetVarInt(pCell + nHeader, &vLen);
} else if (leaf) {
......@@ -1559,8 +1613,27 @@ static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell, int dropOfp, TXN *
bytes = ofp->maxLocal - sizeof(SPgno);
}
// SPgno origPgno = pgno;
memcpy(&pgno, ofpCell + bytes, sizeof(pgno));
ret = tdbPagerWrite(pBt->pPager, ofp);
if (ret < 0) {
tdbError("failed to write page since %s", terrstr());
return -1;
}
/*
tdbPageDropCell(ofp, 0, pTxn, pBt);
*/
// SIntHdr *pIntHdr = (SIntHdr *)(ofp->pData);
// pIntHdr->flags = TDB_FLAG_ADD(0, TDB_BTREE_OVFL);
// pIntHdr->pgno = 0;
// ofp->pPager = NULL;
SArray *ofps = pPage->pPager->ofps;
if (ofps) {
taosArrayPush(ofps, &ofp);
}
tdbPagerReturnPage(pPage->pPager, ofp, pTxn);
nLeft -= bytes;
......@@ -1980,6 +2053,11 @@ static int tdbBtcMoveDownward(SBTC *pBtc) {
return -1;
}
if (TDB_BTREE_PAGE_IS_OVFL(pBtc->pPage)) {
tdbError("tdb/btc-move-downward: should not be a ovfl page here.");
return -1;
}
if (pBtc->idx < TDB_PAGE_TOTAL_CELLS(pBtc->pPage)) {
pCell = tdbPageGetCell(pBtc->pPage, pBtc->idx);
pgno = ((SPgno *)pCell)[0];
......@@ -2068,8 +2146,27 @@ int tdbBtcDelete(SBTC *pBtc) {
return -1;
}
bool destroyOfps = false;
if (!pBtc->pPage->pPager->ofps) {
pBtc->pPage->pPager->ofps = taosArrayInit(8, sizeof(SPage *));
destroyOfps = true;
}
tdbPageDropCell(pBtc->pPage, idx, pBtc->pTxn, pBtc->pBt);
SArray *ofps = pBtc->pPage->pPager->ofps;
if (ofps) {
for (int i = 0; i < TARRAY_SIZE(ofps); ++i) {
SPage *ofp = *(SPage **)taosArrayGet(ofps, i);
tdbPagerInsertFreePage(pBtc->pPage->pPager, ofp, pBtc->pTxn);
}
if (destroyOfps) {
taosArrayDestroy(ofps);
pBtc->pPage->pPager->ofps = NULL;
}
}
// update interior page or do balance
if (idx == nCells - 1) {
if (idx) {
......@@ -2113,6 +2210,8 @@ int tdbBtcDelete(SBTC *pBtc) {
return -1;
}
// printf("tdb/btc-delete: btree balance delete pgno: %d.\n", TDB_PAGE_PGNO(pBtc->pPage));
ret = tdbBtreeBalance(pBtc);
if (ret < 0) {
tdbError("tdb/btc-delete: btree balance failed with ret: %d.", ret);
......@@ -2181,7 +2280,13 @@ int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int
tdbError("tdb/btc-upsert: page insert/update cell failed with ret: %d.", ret);
return -1;
}
/*
bool destroyOfps = false;
if (!pBtc->pPage->pPager->ofps) {
pBtc->pPage->pPager->ofps = taosArrayInit(8, sizeof(SPage *));
destroyOfps = true;
}
*/
// check balance
if (pBtc->pPage->nOverflow > 0) {
ret = tdbBtreeBalance(pBtc);
......@@ -2190,7 +2295,20 @@ int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int
return -1;
}
}
/*
SArray *ofps = pBtc->pPage->pPager->ofps;
if (ofps) {
for (int i = 0; i < TARRAY_SIZE(ofps); ++i) {
SPage *ofp = *(SPage **)taosArrayGet(ofps, i);
tdbPagerInsertFreePage(pBtc->pPage->pPager, ofp, pBtc->pTxn);
}
if (destroyOfps) {
taosArrayDestroy(ofps);
pBtc->pPage->pPager->ofps = NULL;
}
}
*/
return 0;
}
......
......@@ -70,6 +70,11 @@ int32_t tdbOpen(const char *dbname, int32_t szPage, int32_t pages, TDB **ppDb, i
if (ret < 0) {
return -1;
}
ret = tdbTbOpen(TDB_FREEDB_NAME, sizeof(SPgno), 0, NULL, pDb, &pDb->pFreeDb, rollback);
if (ret < 0) {
return -1;
}
#endif
*ppDb = pDb;
......@@ -82,6 +87,7 @@ int tdbClose(TDB *pDb) {
if (pDb) {
#ifdef USE_MAINDB
if (pDb->pMainDb) tdbTbClose(pDb->pMainDb);
if (pDb->pFreeDb) tdbTbClose(pDb->pFreeDb);
#endif
for (pPager = pDb->pgrList; pPager; pPager = pDb->pgrList) {
......
......@@ -292,7 +292,23 @@ int tdbPagerBegin(SPager *pPager, TXN *pTxn) {
*/
return 0;
}
/*
int tdbPagerCancelDirty(SPager *pPager, SPage *pPage, TXN *pTxn) {
SRBTreeNode *pNode = tRBTreeGet(&pPager->rbt, (SRBTreeNode *)pPage);
if (pNode) {
pPage->isDirty = 0;
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
if (pTxn->jPageSet) {
hashset_remove(pTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
}
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
}
return 0;
}
*/
int tdbPagerCommit(SPager *pPager, TXN *pTxn) {
SPage *pPage;
int ret;
......@@ -338,10 +354,13 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) {
if (pTxn->jPageSet) {
hashset_remove(pTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
}
tdbTrace("tdb/pager-commit: remove page: %p %d from dirty tree: %p", pPage, TDB_PAGE_PGNO(pPage), &pPager->rbt);
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
}
tdbTrace("pager/commit reset dirty tree: %p", &pPager->rbt);
tdbTrace("tdb/pager-commit reset dirty tree: %p", &pPager->rbt);
tRBTreeCreate(&pPager->rbt, pageCmpFn);
// sync the db file
......@@ -629,6 +648,8 @@ int tdbPagerFlushPage(SPager *pPager, TXN *pTxn) {
return 0;
}
static int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno, TXN *pTxn);
int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPage)(SPage *, void *, int), void *arg,
TXN *pTxn) {
SPage *pPage;
......@@ -643,7 +664,7 @@ int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPa
// alloc new page
if (pgno == 0) {
loadPage = 0;
ret = tdbPagerAllocPage(pPager, &pgno);
ret = tdbPagerAllocPage(pPager, &pgno, pTxn);
if (ret < 0) {
tdbError("tdb/pager: %p, ret: %d pgno: %" PRIu32 ", alloc page failed.", pPager, ret, pgno);
return -1;
......@@ -695,23 +716,86 @@ void tdbPagerReturnPage(SPager *pPager, SPage *pPage, TXN *pTxn) {
// TDB_PAGE_PGNO(pPage), pPage);
}
static int tdbPagerAllocFreePage(SPager *pPager, SPgno *ppgno) {
// TODO: Allocate a page from the free list
int tdbPagerInsertFreePage(SPager *pPager, SPage *pPage, TXN *pTxn) {
int code = 0;
SPgno pgno = TDB_PAGE_PGNO(pPage);
// memset(pPage->pData, 0, pPage->pageSize);
tdbTrace("tdb/insert-free-page: tbc recycle page: %d.", pgno);
// printf("tdb/insert-free-page: tbc recycle page: %d.\n", pgno);
code = tdbTbInsert(pPager->pEnv->pFreeDb, &pgno, sizeof(pgno), NULL, 0, pTxn);
if (code < 0) {
tdbError("tdb/insert-free-page: tb insert failed with ret: %d.", code);
return -1;
}
pPage->pPager = NULL;
return code;
}
static int tdbPagerRemoveFreePage(SPager *pPager, SPgno *pPgno, TXN *pTxn) {
int code = 0;
TBC *pCur;
if (!pPager->pEnv->pFreeDb) {
return 0;
}
code = tdbTbcOpen(pPager->pEnv->pFreeDb, &pCur, pTxn);
if (code < 0) {
return 0;
}
code = tdbTbcMoveToFirst(pCur);
if (code) {
tdbError("tdb/remove-free-page: moveto first failed with ret: %d.", code);
tdbTbcClose(pCur);
return 0;
}
void *pKey = NULL;
int nKey = 0;
code = tdbTbcGet(pCur, (const void **)&pKey, &nKey, NULL, NULL);
if (code < 0) {
// tdbError("tdb/remove-free-page: tbc get failed with ret: %d.", code);
tdbTbcClose(pCur);
return 0;
}
*pPgno = *(SPgno *)pKey;
tdbTrace("tdb/remove-free-page: tbc get page: %d.", *pPgno);
// printf("tdb/remove-free-page: tbc get page: %d.\n", *pPgno);
code = tdbTbcDelete(pCur);
if (code < 0) {
tdbError("tdb/remove-free-page: tbc delete failed with ret: %d.", code);
tdbTbcClose(pCur);
return 0;
}
tdbTbcClose(pCur);
return 0;
}
static int tdbPagerAllocFreePage(SPager *pPager, SPgno *ppgno, TXN *pTxn) {
// Allocate a page from the free list
return tdbPagerRemoveFreePage(pPager, ppgno, pTxn);
}
static int tdbPagerAllocNewPage(SPager *pPager, SPgno *ppgno) {
*ppgno = ++pPager->dbFileSize;
// tdbError("tdb/alloc-new-page: %d.", *ppgno);
return 0;
}
int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno) {
static int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno, TXN *pTxn) {
int ret;
*ppgno = 0;
// Try to allocate from the free list of the pager
ret = tdbPagerAllocFreePage(pPager, ppgno);
ret = tdbPagerAllocFreePage(pPager, ppgno, pTxn);
if (ret < 0) {
return -1;
}
......
......@@ -131,13 +131,14 @@ typedef struct SBtInfo {
#define TDB_CELLDECODER_FREE_VAL(pCellDecoder) ((pCellDecoder)->freeKV & TDB_CELLD_F_VAL)
typedef struct {
int kLen;
u8 *pKey;
int vLen;
u8 *pVal;
SPgno pgno;
u8 *pBuf;
u8 freeKV;
int kLen;
u8 *pKey;
int vLen;
u8 *pVal;
SPgno pgno;
u8 *pBuf;
u8 freeKV;
SArray *ofps;
} SCellDecoder;
struct SBTC {
......@@ -198,9 +199,10 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn);
int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPage)(SPage *, void *, int), void *arg,
TXN *pTxn);
void tdbPagerReturnPage(SPager *pPager, SPage *pPage, TXN *pTxn);
int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno);
int tdbPagerRestoreJournals(SPager *pPager);
int tdbPagerRollback(SPager *pPager);
int tdbPagerInsertFreePage(SPager *pPager, SPage *pPage, TXN *pTxn);
// int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno);
int tdbPagerRestoreJournals(SPager *pPager);
int tdbPagerRollback(SPager *pPager);
// tdbPCache.c ====================================
#define TDB_PCACHE_PAGE \
......@@ -373,6 +375,7 @@ static inline SCell *tdbPageGetCell(SPage *pPage, int idx) {
#ifdef USE_MAINDB
#define TDB_MAINDB_NAME "main.tdb"
#define TDB_FREEDB_NAME "_free.db"
#endif
struct STDB {
......@@ -386,6 +389,7 @@ struct STDB {
SPager **pgrHash;
#ifdef USE_MAINDB
TTB *pMainDb;
TTB *pFreeDb;
#endif
int64_t txnId;
};
......@@ -403,6 +407,7 @@ struct SPager {
SRBTree rbt;
// u8 inTran;
TXN *pActiveTxn;
SArray *ofps;
SPager *pNext; // used by TDB
SPager *pHashNext; // used by TDB
#ifdef USE_MAINDB
......
......@@ -14,3 +14,7 @@ target_link_libraries(tdbExOVFLTest tdb gtest gtest_main)
add_executable(tdbPageDefragmentTest "tdbPageDefragmentTest.cpp")
target_link_libraries(tdbPageDefragmentTest tdb gtest gtest_main)
# page recycling testing
add_executable(tdbPageRecycleTest "tdbPageRecycleTest.cpp")
target_link_libraries(tdbPageRecycleTest tdb gtest gtest_main)
......@@ -190,6 +190,15 @@ static void insertOfp(void) {
// commit current transaction
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
closePool(pPool);
// Close a database
tdbTbClose(pDb);
// Close Env
ret = tdbClose(pEnv);
GTEST_ASSERT_EQ(ret, 0);
}
// TEST(TdbOVFLPagesTest, DISABLED_TbInsertTest) {
......@@ -233,6 +242,13 @@ TEST(TdbOVFLPagesTest, TbGetTest) {
tdbFree(pVal);
}
// Close a database
tdbTbClose(pDb);
// Close Env
ret = tdbClose(pEnv);
GTEST_ASSERT_EQ(ret, 0);
}
// TEST(TdbOVFLPagesTest, DISABLED_TbDeleteTest) {
......@@ -334,6 +350,15 @@ tdbBegin(pEnv, &txn);
// commit current transaction
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
closePool(pPool);
// Close a database
tdbTbClose(pDb);
// Close Env
ret = tdbClose(pEnv);
GTEST_ASSERT_EQ(ret, 0);
}
// TEST(tdb_test, DISABLED_simple_insert1) {
......@@ -407,6 +432,8 @@ TEST(tdb_test, simple_insert1) {
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
closePool(pPool);
{ // Query the data
void *pVal = NULL;
int vLen;
......
此差异已折叠。
......@@ -129,6 +129,7 @@ sql DROP INDEX sma_index_3 ;
print ========== step8
sql drop database if exists db;
sleep 2000
sql create database db duration 300;
sql use db;
sql create table stb1(ts timestamp, c_int int, c_bint bigint, c_sint smallint, c_tint tinyint,c_float float, c_double double, c_bool bool,c_binary binary(16), c_nchar nchar(32), c_ts timestamp,c_tint_un tinyint unsigned, c_sint_un smallint unsigned,c_int_un int unsigned, c_bint_un bigint unsigned) tags (t_int int);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册