提交 0c7a4bfa 编写于 作者: B Benguang Zhao

enh: refactor syncBeginSnapshot and walBeginSnapshot for logRetention

上级 16bc8cb5
......@@ -66,6 +66,7 @@ typedef struct {
int64_t commitVer;
int64_t appliedVer;
int64_t lastVer;
int64_t logRetention;
} SWalVer;
#pragma pack(push, 1)
......@@ -180,7 +181,7 @@ void walFsync(SWal *, bool force);
int32_t walCommit(SWal *, int64_t ver);
int32_t walRollback(SWal *, int64_t ver);
// notify that previous logs can be pruned safely
int32_t walBeginSnapshot(SWal *, int64_t ver);
int32_t walBeginSnapshot(SWal *, int64_t ver, int64_t logRetention);
int32_t walEndSnapshot(SWal *);
int32_t walRestoreFromSnapshot(SWal *, int64_t ver);
// for tq
......
......@@ -472,7 +472,7 @@ int32_t sdbWriteFile(SSdb *pSdb, int32_t delta) {
taosThreadMutexLock(&pSdb->filelock);
if (pSdb->pWal != NULL) {
// code = walBeginSnapshot(pSdb->pWal, pSdb->applyIndex);
// code = walBeginSnapshot(pSdb->pWal, pSdb->applyIndex, 0);
if (pSdb->sync == 0) {
code = 0;
} else {
......
......@@ -270,86 +270,38 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
return -1;
}
SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore);
if (isEmpty || !(lastApplyIndex >= beginIndex && lastApplyIndex <= endIndex)) {
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 ", empty:%d, do not delete wal", lastApplyIndex, isEmpty);
syncNodeRelease(pSyncNode);
return 0;
}
int32_t code = 0;
int64_t logRetention = 0;
if (syncNodeIsMnode(pSyncNode)) {
// mnode
int64_t logRetention = SYNC_MNODE_LOG_RETENTION;
SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
int64_t logNum = endIndex - beginIndex;
bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore);
if (isEmpty || (!isEmpty && logNum < logRetention)) {
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 ", log-num:%" PRId64 ", empty:%d, do not delete wal",
lastApplyIndex, logNum, isEmpty);
syncNodeRelease(pSyncNode);
return 0;
}
goto _DEL_WAL;
logRetention = SYNC_MNODE_LOG_RETENTION;
} else {
SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore);
if (isEmpty || !(lastApplyIndex >= beginIndex && lastApplyIndex <= endIndex)) {
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 ", empty:%d, do not delete wal", lastApplyIndex, isEmpty);
syncNodeRelease(pSyncNode);
return 0;
}
// vnode
if (pSyncNode->replicaNum > 1) {
// multi replicas
logRetention = SYNC_VNODE_LOG_RETENTION;
}
}
lastApplyIndex = TMAX(lastApplyIndex - SYNC_VNODE_LOG_RETENTION, beginIndex - 1);
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode);
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
int64_t matchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[i]));
if (lastApplyIndex > matchIndex) {
sNTrace(pSyncNode,
"new-snapshot-index:%" PRId64 " is greater than match-index:%" PRId64
" of dnode:%d, do not delete wal",
lastApplyIndex, matchIndex, DID(&pSyncNode->peersId[i]));
syncNodeRelease(pSyncNode);
return 0;
}
}
} else if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) {
if (lastApplyIndex > pSyncNode->minMatchIndex) {
sNTrace(pSyncNode,
"new-snapshot-index:%" PRId64 " is greater than min-match-index:%" PRId64 ", do not delete wal",
lastApplyIndex, pSyncNode->minMatchIndex);
syncNodeRelease(pSyncNode);
return 0;
}
} else if (pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE) {
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 " candidate, do not delete wal", lastApplyIndex);
syncNodeRelease(pSyncNode);
return 0;
} else {
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 " unknown state, do not delete wal", lastApplyIndex);
syncNodeRelease(pSyncNode);
return 0;
}
goto _DEL_WAL;
} else {
// one replica
goto _DEL_WAL;
if (pSyncNode->replicaNum > 1) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER && pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER) {
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 " candidate or unknown state, do not delete wal",
lastApplyIndex);
syncNodeRelease(pSyncNode);
return 0;
}
logRetention = TMAX(logRetention, lastApplyIndex - pSyncNode->minMatchIndex);
}
_DEL_WAL:
......@@ -366,7 +318,7 @@ _DEL_WAL:
atomic_store_64(&pSyncNode->snapshottingIndex, lastApplyIndex);
pSyncNode->snapshottingTime = taosGetTimestampMs();
code = walBeginSnapshot(pData->pWal, lastApplyIndex);
code = walBeginSnapshot(pData->pWal, lastApplyIndex, logRetention);
if (code == 0) {
sNTrace(pSyncNode, "wal snapshot begin, index:%" PRId64 ", last apply index:%" PRId64,
pSyncNode->snapshottingIndex, lastApplyIndex);
......
......@@ -247,21 +247,23 @@ static FORCE_INLINE int32_t walCheckAndRoll(SWal *pWal) {
return 0;
}
int32_t walBeginSnapshot(SWal *pWal, int64_t ver) {
int32_t walBeginSnapshot(SWal *pWal, int64_t ver, int64_t logRetention) {
taosThreadMutexLock(&pWal->mutex);
ASSERT(logRetention >= 0);
pWal->vers.verInSnapshotting = ver;
wDebug("vgId:%d, wal begin snapshot for version %" PRId64 ", first ver %" PRId64 ", last ver %" PRId64,
pWal->cfg.vgId, ver, pWal->vers.firstVer, pWal->vers.lastVer);
pWal->vers.logRetention = logRetention;
wDebug("vgId:%d, wal begin snapshot for version %" PRId64 ", log retention %" PRId64 " first ver %" PRId64
", last ver %" PRId64,
pWal->cfg.vgId, ver, pWal->vers.logRetention, pWal->vers.firstVer, pWal->vers.lastVer);
// check file rolling
if (pWal->cfg.retentionPeriod == 0) {
if (walGetLastFileSize(pWal) != 0) {
if (walRollImpl(pWal) < 0) {
wError("vgId:%d, failed to roll wal files since %s", pWal->cfg.vgId, terrstr());
goto _err;
}
if (walGetLastFileSize(pWal) != 0) {
if (walRollImpl(pWal) < 0) {
wError("vgId:%d, failed to roll wal files since %s", pWal->cfg.vgId, terrstr());
goto _err;
}
}
taosThreadMutexUnlock(&pWal->mutex);
return 0;
......@@ -275,8 +277,9 @@ int32_t walEndSnapshot(SWal *pWal) {
taosThreadMutexLock(&pWal->mutex);
int64_t ver = pWal->vers.verInSnapshotting;
wDebug("vgId:%d, wal end snapshot for version %" PRId64 ", first ver %" PRId64 ", last ver %" PRId64, pWal->cfg.vgId,
ver, pWal->vers.firstVer, pWal->vers.lastVer);
wDebug("vgId:%d, wal end snapshot for version %" PRId64 ", log retention %" PRId64 " first ver %" PRId64
", last ver %" PRId64,
pWal->cfg.vgId, ver, pWal->vers.logRetention, pWal->vers.firstVer, pWal->vers.lastVer);
if (ver == -1) {
code = -1;
......@@ -286,6 +289,7 @@ int32_t walEndSnapshot(SWal *pWal) {
pWal->vers.snapshotVer = ver;
int ts = taosGetTimestampSec();
ver = TMAX(ver - pWal->vers.logRetention, pWal->vers.firstVer - 1);
void *pIter = NULL;
while (1) {
pIter = taosHashIterate(pWal->pRefHash, pIter);
......
......@@ -264,7 +264,7 @@ TEST_F(WalCleanEnv, rollbackMultiFile) {
ASSERT_EQ(code, 0);
ASSERT_EQ(pWal->vers.lastVer, i);
if (i == 5) {
walBeginSnapshot(pWal, i);
walBeginSnapshot(pWal, i, 0);
walEndSnapshot(pWal);
}
}
......@@ -301,7 +301,7 @@ TEST_F(WalCleanDeleteEnv, roll) {
ASSERT_EQ(pWal->vers.commitVer, i);
}
walBeginSnapshot(pWal, i - 1);
walBeginSnapshot(pWal, i - 1, 0);
ASSERT_EQ(pWal->vers.verInSnapshotting, i - 1);
walEndSnapshot(pWal);
ASSERT_EQ(pWal->vers.snapshotVer, i - 1);
......@@ -317,7 +317,7 @@ TEST_F(WalCleanDeleteEnv, roll) {
ASSERT_EQ(pWal->vers.commitVer, i);
}
code = walBeginSnapshot(pWal, i - 1);
code = walBeginSnapshot(pWal, i - 1, 0);
ASSERT_EQ(code, 0);
code = walEndSnapshot(pWal);
ASSERT_EQ(code, 0);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册