diff --git a/src/inc/tsync.h b/src/inc/tsync.h index 379c877b266b6026ea9d9ee55f76ecb24fca1a44..e5aa07fe7b9800e9bcdf0c877a0a0a2ce522f1ba 100644 --- a/src/inc/tsync.h +++ b/src/inc/tsync.h @@ -79,6 +79,9 @@ typedef void (*FStopSyncFile)(int32_t vgId, uint64_t fversion); // get file version typedef int32_t (*FGetVersion)(int32_t vgId, uint64_t *fver, uint64_t *vver); +// reset version +typedef int32_t (*FResetVersion)(int32_t vgId, uint64_t fver); + typedef int32_t (*FSendFile)(void *tsdb, SOCKET socketFd); typedef int32_t (*FRecvFile)(void *tsdb, SOCKET socketFd); @@ -96,6 +99,7 @@ typedef struct { FStartSyncFile startSyncFileFp; FStopSyncFile stopSyncFileFp; FGetVersion getVersionFp; + FResetVersion resetVersionFp; FSendFile sendFileFp; FRecvFile recvFileFp; } SSyncInfo; diff --git a/src/inc/twal.h b/src/inc/twal.h index 1645de77aacf020048dc21cd47907163089f269f..bce398d6f95518379c1cd4c0d95a4f6324aab4d7 100644 --- a/src/inc/twal.h +++ b/src/inc/twal.h @@ -65,6 +65,7 @@ void walFsync(twalh, bool forceFsync); int32_t walRestore(twalh, void *pVnode, FWalWrite writeFp); int32_t walGetWalFile(twalh, char *fileName, int64_t *fileId); uint64_t walGetVersion(twalh); +void walResetVersion(twalh, uint64_t newVer); #ifdef __cplusplus } diff --git a/src/sync/inc/syncInt.h b/src/sync/inc/syncInt.h index 91613ae35107f0a667798e730f6021687e3ced5e..2b87938474d9385e5d093dcf524be777eb22db27 100644 --- a/src/sync/inc/syncInt.h +++ b/src/sync/inc/syncInt.h @@ -117,6 +117,7 @@ typedef struct SSyncNode { FStartSyncFile startSyncFileFp; FStopSyncFile stopSyncFileFp; FGetVersion getVersionFp; + FResetVersion resetVersionFp; FSendFile sendFileFp; FRecvFile recvFileFp; pthread_mutex_t mutex; diff --git a/src/sync/src/syncMain.c b/src/sync/src/syncMain.c index 956ccdc0730e345d2e0ece3bcf45e4d0e9b3c14d..f018f00848ccb87d23fe210eab007cd1620787c4 100644 --- a/src/sync/src/syncMain.c +++ b/src/sync/src/syncMain.c @@ -182,6 +182,7 @@ int64_t syncStart(const SSyncInfo *pInfo) { pNode->startSyncFileFp = pInfo->startSyncFileFp; pNode->stopSyncFileFp = pInfo->stopSyncFileFp; pNode->getVersionFp = pInfo->getVersionFp; + pNode->resetVersionFp = pInfo->resetVersionFp; pNode->sendFileFp = pInfo->sendFileFp; pNode->recvFileFp = pInfo->recvFileFp; diff --git a/src/sync/src/syncRestore.c b/src/sync/src/syncRestore.c index c0d66316cd5b802ddcaddf1015d4ceca1aa3b2c5..4c2444caa008f4198c7f0721cb4e6956cdc7f89d 100644 --- a/src/sync/src/syncRestore.c +++ b/src/sync/src/syncRestore.c @@ -238,6 +238,7 @@ static int32_t syncRestoreDataStepByStep(SSyncPeer *pPeer) { (*pNode->stopSyncFileFp)(pNode->vgId, fversion); nodeVersion = fversion; + (*pNode->resetVersionFp)(pNode->vgId, fversion); sInfo("%s, start to restore wal, fver:%" PRIu64, pPeer->id, nodeVersion); uint64_t wver = 0; diff --git a/src/vnode/inc/vnodeSync.h b/src/vnode/inc/vnodeSync.h index c9ac25c2274d81cd08c52a77cd3cc76a27c7a0d5..9a7da626f88d8b71830551c493901fea0834b04a 100644 --- a/src/vnode/inc/vnodeSync.h +++ b/src/vnode/inc/vnodeSync.h @@ -30,6 +30,7 @@ void vnodeStopSyncFile(int32_t vgId, uint64_t fversion); void vnodeConfirmForard(int32_t vgId, void *wparam, int32_t code); int32_t vnodeWriteToCache(int32_t vgId, void *wparam, int32_t qtype, void *rparam); int32_t vnodeGetVersion(int32_t vgId, uint64_t *fver, uint64_t *wver); +int32_t vnodeResetVersion(int32_t vgId, uint64_t fver); void vnodeConfirmForward(void *pVnode, uint64_t version, int32_t code); diff --git a/src/vnode/src/vnodeMain.c b/src/vnode/src/vnodeMain.c index ac9536d243dbd2613d42c8b9a5b054ce862df061..c380b60f88f6f3cd91bbda3ba0ce66bab3797fe9 100644 --- a/src/vnode/src/vnodeMain.c +++ b/src/vnode/src/vnodeMain.c @@ -305,6 +305,7 @@ int32_t vnodeOpen(int32_t vgId) { syncInfo.startSyncFileFp = vnodeStartSyncFile; syncInfo.stopSyncFileFp = vnodeStopSyncFile; syncInfo.getVersionFp = vnodeGetVersion; + syncInfo.resetVersionFp = vnodeResetVersion; syncInfo.sendFileFp = tsdbSyncSend; syncInfo.recvFileFp = tsdbSyncRecv; syncInfo.pTsdb = pVnode->tsdb; diff --git a/src/vnode/src/vnodeSync.c b/src/vnode/src/vnodeSync.c index 627783c391d45b37830cb2b6d851fa6dd3261819..1f9c6c721686e6ce6085e5407e0a3fd98c0ee0b6 100644 --- a/src/vnode/src/vnodeSync.c +++ b/src/vnode/src/vnodeSync.c @@ -158,6 +158,22 @@ int32_t vnodeGetVersion(int32_t vgId, uint64_t *fver, uint64_t *wver) { return code; } +int32_t vnodeResetVersion(int32_t vgId, uint64_t fver) { + SVnodeObj *pVnode = vnodeAcquire(vgId); + if (pVnode == NULL) { + vError("vgId:%d, vnode not found while reset version", vgId); + return -1; + } + + pVnode->fversion = fver; + pVnode->version = fver; + walResetVersion(pVnode->wal, fver); + vDebug("vgId:%d, version reset to %" PRIu64, vgId, fver); + + vnodeRelease(pVnode); + return 0; +} + void vnodeConfirmForward(void *vparam, uint64_t version, int32_t code) { SVnodeObj *pVnode = vparam; syncConfirmForward(pVnode->sync, version, code); diff --git a/src/wal/src/walWrite.c b/src/wal/src/walWrite.c index aeb49830299eb0dcddfbd39a7a838fdc5d45b081..51b770d346208c4c8de7417c8108ac902ce7cbf9 100644 --- a/src/wal/src/walWrite.c +++ b/src/wal/src/walWrite.c @@ -446,3 +446,16 @@ uint64_t walGetVersion(twalh param) { return pWal->version; } + +// Wal version in slave (dnode1) must be reset. +// Because after the data file is recovered from peer (dnode2), the new file version in dnode1 may become smaller than origin. +// Some new wal record cannot be written to the wal file in dnode1 for wal version not reset, then fversion and the record in wal file may inconsistent, +// At this time, if dnode2 down, dnode1 switched to master. After dnode2 start and restore data from dnode1, data loss will occur + +void walResetVersion(twalh param, uint64_t newVer) { + SWal *pWal = param; + if (pWal == 0) return; + wDebug("vgId:%d, version reset from %" PRIu64 " to %" PRIu64, pWal->vgId, pWal->version, newVer); + + pWal->version = newVer; +} \ No newline at end of file diff --git a/tests/script/issue/TD-3300.sim b/tests/script/issue/TD-3300.sim new file mode 100644 index 0000000000000000000000000000000000000000..0745ceb8490567b508397b3e99d4748dc41c8971 --- /dev/null +++ b/tests/script/issue/TD-3300.sim @@ -0,0 +1,556 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/deploy.sh -n dnode2 -i 2 +system sh/deploy.sh -n dnode3 -i 3 +system sh/deploy.sh -n dnode4 -i 4 + +system sh/cfg.sh -n dnode1 -c numOfMnodes -v 1 +system sh/cfg.sh -n dnode2 -c numOfMnodes -v 1 +system sh/cfg.sh -n dnode3 -c numOfMnodes -v 1 +system sh/cfg.sh -n dnode4 -c numOfMnodes -v 1 + +system sh/cfg.sh -n dnode1 -c role -v 1 +system sh/cfg.sh -n dnode2 -c role -v 2 +system sh/cfg.sh -n dnode3 -c role -v 2 +system sh/cfg.sh -n dnode4 -c role -v 2 + +system sh/cfg.sh -n dnode1 -c arbitrator -v $arbitrator +system sh/cfg.sh -n dnode2 -c arbitrator -v $arbitrator +system sh/cfg.sh -n dnode3 -c arbitrator -v $arbitrator +system sh/cfg.sh -n dnode4 -c arbitrator -v $arbitrator + +print ============== step0: start tarbitrator +system sh/exec_tarbitrator.sh -s start + +print ============== step1: start dnode1, only deploy mnode +system sh/exec.sh -n dnode1 -s start +sql connect + +print ============== step2: start dnode2/dnode3 +system sh/exec.sh -n dnode2 -s start +system sh/exec.sh -n dnode3 -s start +sql create dnode $hostname2 +sql create dnode $hostname3 + +$x = 0 +step2: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi + +sql show dnodes +print dnode1 $data4_1 +print dnode2 $data4_2 +print dnode3 $data4_3 + +if $data4_1 != ready then + goto step2 +endi +if $data4_2 != ready then + goto step2 +endi +if $data4_3 != ready then + goto step2 +endi + +sleep 1000 + +print ============== step3 +sql create database db replica 2 +sql use db + +sql create table stb (ts timestamp, c1 int, c2 int) tags(t1 int) +sql create table t1 using stb tags(1) +sql insert into t1 values(1577980800000, 1, 5) +sql insert into t1 values(1577980800001, 2, 4) +sql insert into t1 values(1577980800002, 3, 3) +sql insert into t1 values(1577980800003, 4, 2) +sql insert into t1 values(1577980800004, 5, 1) + +sql show db.vgroups +if $data04 != 3 then + return -1 +endi +if $data06 != 2 then + return -1 +endi +if $data05 != master then + return -1 +endi +if $data07 != slave then + return -1 +endi + +sql select * from t1 +if $rows != 5 then + return -1 +endi + +system sh/exec.sh -n dnode2 -s stop -x SIGKILL +system sh/exec.sh -n dnode3 -s stop -x SIGKILL + +print ============== step4 +system sh/exec.sh -n dnode2 -s start +system sh/exec.sh -n dnode3 -s start + +$x = 0 +step4: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi + +sql show dnodes +print dnode1 $data4_1 +print dnode2 $data4_2 +print dnode3 $data4_3 + +if $data4_1 != ready then + goto step4 +endi +if $data4_2 != ready then + goto step4 +endi +if $data4_3 != ready then + goto step4 +endi + +sql show db.vgroups +if $data04 != 3 then + goto step4 +endi +if $data06 != 2 then + goto step4 +endi +if $data05 != master then + goto step4 +endi +if $data07 != slave then + goto step4 +endi + +sql create table t2 using stb tags(1) +sql insert into t2 values(1577980800000, 1, 5) +sql insert into t2 values(1577980800001, 2, 4) +sql insert into t2 values(1577980800002, 3, 3) +sql insert into t2 values(1577980800003, 4, 2) +sql insert into t2 values(1577980800004, 5, 1) + +sql select * from t2 +if $rows != 5 then + return -1 +endi + +print ============== step5 +system sh/exec.sh -n dnode3 -s stop -x SIGKILL + +$x = 0 +step5: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi + +sql show dnodes +print dnode1 $data4_1 +print dnode2 $data4_2 +print dnode3 $data4_3 + +if $data4_1 != ready then + goto step5 +endi +if $data4_2 != ready then + goto step5 +endi +if $data4_3 != offline then + goto step5 +endi + +sql select * from t1 +if $rows != 5 then + return -1 +endi +sql select * from t2 +if $rows != 5 then + return -1 +endi + +sql show db.vgroups +if $data04 != 3 then + goto step5 +endi +if $data06 != 2 then + goto step5 +endi +if $data05 != offline then + goto step5 +endi +if $data07 != master then + goto step5 +endi + +print ============== step6 +sql create table t3 using stb tags(1) +sql insert into t3 values(1577980800000, 1, 5) +sql insert into t3 values(1577980800001, 2, 4) +sql insert into t3 values(1577980800002, 3, 3) +sql insert into t3 values(1577980800003, 4, 2) +sql insert into t3 values(1577980800004, 5, 1) +sql insert into t3 values(1577980800010, 11, 5) +sql insert into t3 values(1577980800011, 12, 4) +sql insert into t3 values(1577980800012, 13, 3) +sql insert into t3 values(1577980800013, 14, 2) +sql insert into t3 values(1577980800014, 15, 1) + +sql select * from t1 +if $rows != 5 then + return -1 +endi +sql select * from t2 +if $rows != 5 then + return -1 +endi +sql select * from t3 +if $rows != 10 then + return -1 +endi + +system sh/exec.sh -n dnode3 -s start + +$x = 0 +step6: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi + +sql show dnodes +print dnode1 $data4_1 +print dnode2 $data4_2 +print dnode3 $data4_3 + +if $data4_1 != ready then + goto step6 +endi +if $data4_2 != ready then + goto step6 +endi +if $data4_3 != ready then + goto step6 +endi + +sql show db.vgroups +if $data04 != 3 then + goto step6 +endi +if $data06 != 2 then + goto step6 +endi +if $data05 != slave then + goto step6 +endi +if $data07 != master then + goto step6 +endi + +sql select * from t1 +if $rows != 5 then + return -1 +endi +sql select * from t2 +if $rows != 5 then + return -1 +endi +sql select * from t3 +if $rows != 10 then + return -1 +endi + +print ============== step7 +sql create table t4 using stb tags(1) +sql insert into t4 values(1577980800000, 1, 5) +sql insert into t4 values(1577980800001, 2, 4) +sql insert into t4 values(1577980800002, 3, 3) +sql insert into t4 values(1577980800003, 4, 2) +sql insert into t4 values(1577980800004, 5, 1) +sql insert into t4 values(1577980800010, 11, 5) +sql insert into t4 values(1577980800011, 12, 4) +sql insert into t4 values(1577980800012, 13, 3) +sql insert into t4 values(1577980800013, 14, 2) +sql insert into t4 values(1577980800014, 15, 1) +sql insert into t4 values(1577980800020, 21, 5) +sql insert into t4 values(1577980800021, 22, 4) +sql insert into t4 values(1577980800022, 23, 3) +sql insert into t4 values(1577980800023, 24, 2) +sql insert into t4 values(1577980800024, 25, 1) + +sql select * from t1 +if $rows != 5 then + return -1 +endi +sql select * from t2 +if $rows != 5 then + return -1 +endi +sql select * from t3 +if $rows != 10 then + return -1 +endi +sql select * from t4 +if $rows != 15 then + return -1 +endi + +system sh/exec.sh -n dnode2 -s stop -x SIGKILL +$x = 0 +step7: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi + +sql show dnodes +print dnode1 $data4_1 +print dnode2 $data4_2 +print dnode3 $data4_3 + +if $data4_1 != ready then + goto step7 +endi +if $data4_2 != offline then + goto step7 +endi +if $data4_3 != ready then + goto step7 +endi + +sql show db.vgroups +if $data04 != 3 then + goto step7 +endi +if $data06 != 2 then + goto step7 +endi +if $data05 != master then + goto step7 +endi +if $data07 != offline then + goto step7 +endi + +sql select * from t1 +if $rows != 5 then + return -1 +endi +sql select * from t2 +if $rows != 5 then + return -1 +endi +sql select * from t3 +if $rows != 10 then + return -1 +endi +sql select * from t4 +if $rows != 15 then + return -1 +endi + +print ============== step8 +sql create table t5 using stb tags(1) +sql insert into t5 values(1577980800000, 1, 5) +sql insert into t5 values(1577980800001, 2, 4) +sql insert into t5 values(1577980800002, 3, 3) +sql insert into t5 values(1577980800003, 4, 2) +sql insert into t5 values(1577980800004, 5, 1) +sql insert into t5 values(1577980800010, 11, 5) + +sql select * from t1 +if $rows != 5 then + return -1 +endi +sql select * from t2 +if $rows != 5 then + return -1 +endi +sql select * from t3 +if $rows != 10 then + return -1 +endi +sql select * from t4 +if $rows != 15 then + return -1 +endi +sql select * from t5 +if $rows != 6 then + return -1 +endi + +system sh/exec.sh -n dnode2 -s start +$x = 0 +step8: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi + +sql show dnodes +print dnode1 $data4_1 +print dnode2 $data4_2 +print dnode3 $data4_3 + +if $data4_1 != ready then + goto step8 +endi +if $data4_2 != ready then + goto step8 +endi +if $data4_3 != ready then + goto step8 +endi + +sql show db.vgroups +if $data04 != 3 then + goto step8 +endi +if $data06 != 2 then + goto step8 +endi +if $data05 != master then + goto step8 +endi +if $data07 != slave then + goto step8 +endi + +sql select * from t1 +if $rows != 5 then + return -1 +endi +sql select * from t2 +if $rows != 5 then + return -1 +endi +sql select * from t3 +if $rows != 10 then + return -1 +endi +sql select * from t4 +if $rows != 15 then + return -1 +endi +sql select * from t5 +if $rows != 6 then + return -1 +endi + +print ============== step9 +sql create table t6 using stb tags(1) +sql insert into t6 values(1577980800000, 1, 5) +sql insert into t6 values(1577980800001, 2, 4) +sql insert into t6 values(1577980800002, 3, 3) +sql insert into t6 values(1577980800003, 4, 2) +sql insert into t6 values(1577980800004, 5, 1) +sql insert into t6 values(1577980800010, 11, 5) +sql insert into t6 values(1577980800011, 12, 4) + +sql select * from t1 +if $rows != 5 then + return -1 +endi +sql select * from t2 +if $rows != 5 then + return -1 +endi +sql select * from t3 +if $rows != 10 then + return -1 +endi +sql select * from t4 +if $rows != 15 then + return -1 +endi +sql select * from t5 +if $rows != 6 then + return -1 +endi +sql select * from t6 +if $rows != 7 then + return -1 +endi + +system sh/exec.sh -n dnode3 -s stop -x SIGKILL +$x = 0 +step9: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi + +sql show dnodes +print dnode1 $data4_1 +print dnode2 $data4_2 +print dnode3 $data4_3 + +if $data4_1 != ready then + goto step9 +endi +if $data4_2 != ready then + goto step9 +endi +if $data4_3 != offline then + goto step9 +endi + +print ============== 2 +sql show db.vgroups + +if $data04 != 3 then + goto step7 +endi +if $data06 != 2 then + goto step7 +endi +if $data05 != offline then + goto step7 +endi +if $data07 != master then + goto step7 +endi + +print ============== 3 +sql select * from t1 +if $rows != 5 then + return -1 +endi +sql select * from t2 +if $rows != 5 then + return -1 +endi +sql select * from t3 +if $rows != 10 then + return -1 +endi +sql select * from t4 +if $rows != 15 then + return -1 +endi +sql select * from t5 +if $rows != 6 then + return -1 +endi +sql select * from t6 +if $rows != 7 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop +system sh/exec.sh -n dnode2 -s stop +system sh/exec.sh -n dnode3 -s stop