diff --git a/include/common/tdatablock.h b/include/common/tdatablock.h index 7839859e8b0e6b37d4184b0236a6c7620c447595..9ca67056c69728c6da86f70f53dd436405ed688b 100644 --- a/include/common/tdatablock.h +++ b/include/common/tdatablock.h @@ -246,7 +246,7 @@ void blockDebugShowDataBlocks(const SArray* dataBlocks, const char* flag); // for debug char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** dumpBuf); -int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SArray* pDataBlocks, STSchema* pTSchema, int32_t vgId, +int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SSDataBlock* pDataBlocks, STSchema* pTSchema, int32_t vgId, tb_uid_t suid); char* buildCtbNameByGroupId(const char* stbName, uint64_t groupId); diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 20dc04631e0aee7ae2d694f8dbe0eb4b048cb17c..9ddb8720078b5cf0fb0f486c25b1afc44b3937cc 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -200,6 +200,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_CANCEL_SMA, "vnode-cancel-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_SMA, "vnode-drop-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT_RSMA, "vnode-submit-rsma", SSubmitReq, SSubmitRsp) + TD_DEF_MSG_TYPE(TDMT_VND_FETCH_RSMA, "vnode-fetch-rsma", SRSmaFetchMsg, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DELETE, "delete-data", SVDeleteReq, SVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_REPLICA, "alter-replica", NULL, NULL) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 302874962e2be899d0df5a0142210b1ecb1c12db..bf33976c08ea4fe4d02ee9d0326e80271980ee45 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1874,21 +1874,20 @@ char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) * @brief TODO: Assume that the final generated result it less than 3M * * @param pReq - * @param pDataBlocks + * @param pDataBlock * @param vgId - * @param suid // TODO: check with Liao whether suid response is reasonable + * @param suid * - * TODO: colId should be set */ -int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SArray* pDataBlocks, STSchema* pTSchema, int32_t vgId, +int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SSDataBlock* pDataBlock, STSchema* pTSchema, int32_t vgId, tb_uid_t suid) { - int32_t sz = taosArrayGetSize(pDataBlocks); int32_t bufSize = sizeof(SSubmitReq); + int32_t sz = 1; for (int32_t i = 0; i < sz; ++i) { - SDataBlockInfo* pBlkInfo = &((SSDataBlock*)taosArrayGet(pDataBlocks, i))->info; + const SDataBlockInfo* pBlkInfo = &pDataBlock->info; - int32_t numOfCols = taosArrayGetSize(pDataBlocks); - bufSize += pBlkInfo->rows * (TD_ROW_HEAD_LEN + pBlkInfo->rowSize + BitmapLen(numOfCols)); + int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock); + bufSize += pBlkInfo->rows * (TD_ROW_HEAD_LEN + pBlkInfo->rowSize + BitmapLen(colNum)); bufSize += sizeof(SSubmitBlk); } @@ -1905,7 +1904,6 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SArray* pDataBlocks tdSRowInit(&rb, pTSchema->version); for (int32_t i = 0; i < sz; ++i) { - SSDataBlock* pDataBlock = taosArrayGet(pDataBlocks, i); int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock); int32_t rows = pDataBlock->info.rows; // int32_t rowSize = pDataBlock->info.rowSize; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index f836cd76acb25e4aa93e7e47bcc4176ddf9788ca..a0f02d96f91913bbe127b992052ecd03a2008b61 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -401,7 +401,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { tsNumOfVnodeWriteThreads = TMAX(tsNumOfVnodeWriteThreads, 1); if (cfgAddInt32(pCfg, "numOfVnodeWriteThreads", tsNumOfVnodeWriteThreads, 1, 1024, 0) != 0) return -1; - tsNumOfVnodeSyncThreads = tsNumOfCores; + // tsNumOfVnodeSyncThreads = tsNumOfCores; + tsNumOfVnodeSyncThreads = 32; tsNumOfVnodeSyncThreads = TMAX(tsNumOfVnodeSyncThreads, 1); if (cfgAddInt32(pCfg, "numOfVnodeSyncThreads", tsNumOfVnodeSyncThreads, 1, 1024, 0) != 0) return -1; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index eca61dd960eaf6fd9b0dfbd0d9bdc4de698e8c77..1b799b1e5e39a5119b868ef273e5764c0418c892 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -347,6 +347,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_TABLES_META, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_CANCEL_TASK, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_DROP_TASK, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_FETCH_RSMA, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_CREATE_STB, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_TTL_TABLE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_STB, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 4c6320ecb54ee6be38fe8f6f30d44481f53ad476..47f7d209b33d461786a78800e660a493eba6e330 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -187,6 +187,7 @@ int32_t smaAsyncPreCommit(SSma* pSma); int32_t smaAsyncCommit(SSma* pSma); int32_t smaAsyncPostCommit(SSma* pSma); int32_t smaDoRetention(SSma* pSma, int64_t now); +int32_t smaProcessFetch(SSma *pSma, void* pMsg); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index fd2222c5e4e7aa2c38ffd9788446f4f025cab71f..6b882251f441449d25159c367dd9a27c56385356 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -36,19 +36,17 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputT int8_t level); static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); - -static int32_t tdRSmaFetchAndSubmitResult(qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, int64_t suid, - SRSmaStat *pStat, int8_t blkType); -static void tdRSmaFetchTrigger(void *param, void *tmrId); - -static int32_t tdRSmaQTaskInfoIterInit(SRSmaQTaskInfoIter *pIter, STFile *pTFile); -static int32_t tdRSmaQTaskInfoIterNextBlock(SRSmaQTaskInfoIter *pIter, bool *isFinish); -static int32_t tdRSmaQTaskInfoRestore(SSma *pSma, int8_t type, SRSmaQTaskInfoIter *pIter); -static int32_t tdRSmaQTaskInfoItemRestore(SSma *pSma, const SRSmaQTaskInfoItem *infoItem); - -static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables); -static int32_t tdRSmaRestoreQTaskInfoReload(SSma *pSma, int8_t type, int64_t qTaskFileVer); -static int32_t tdRSmaRestoreTSDataReload(SSma *pSma); +static int32_t tdRSmaFetchAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, + int64_t suid, int8_t blkType); +static void tdRSmaFetchTrigger(void *param, void *tmrId); +static int32_t tdRSmaFetchSend(SSma *pSma, SRSmaInfo *pInfo, int8_t level); +static int32_t tdRSmaQTaskInfoIterInit(SRSmaQTaskInfoIter *pIter, STFile *pTFile); +static int32_t tdRSmaQTaskInfoIterNextBlock(SRSmaQTaskInfoIter *pIter, bool *isFinish); +static int32_t tdRSmaQTaskInfoRestore(SSma *pSma, int8_t type, SRSmaQTaskInfoIter *pIter); +static int32_t tdRSmaQTaskInfoItemRestore(SSma *pSma, const SRSmaQTaskInfoItem *infoItem); +static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables); +static int32_t tdRSmaRestoreQTaskInfoReload(SSma *pSma, int8_t type, int64_t qTaskFileVer); +static int32_t tdRSmaRestoreTSDataReload(SSma *pSma); static SRSmaInfo *tdGetRSmaInfoByItem(SRSmaInfoItem *pItem) { // adapt accordingly if definition of SRSmaInfo update @@ -604,11 +602,8 @@ _end: return code; } -static int32_t tdRSmaFetchAndSubmitResult(qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, int64_t suid, - SRSmaStat *pStat, int8_t blkType) { - SArray *pResult = NULL; - SSma *pSma = pStat->pSma; - +static int32_t tdRSmaFetchAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, + int64_t suid, int8_t blkType) { while (1) { SSDataBlock *output = NULL; uint64_t ts; @@ -619,30 +614,20 @@ static int32_t tdRSmaFetchAndSubmitResult(qTaskInfo_t taskInfo, SRSmaInfoItem *p pItem->level, terrstr(code)); goto _err; } - if (!output) { - break; - } - if (!pResult) { - pResult = taosArrayInit(1, sizeof(SSDataBlock)); - if (!pResult) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - - taosArrayPush(pResult, output); - - if (taosArrayGetSize(pResult) > 0) { -#if 1 + if (output) { +#if 0 char flag[10] = {0}; snprintf(flag, 10, "level %" PRIi8, pItem->level); + SArray *pResult = taosArrayInit(1, sizeof(SSDataBlock)); + taosArrayPush(pResult, output); blockDebugShowDataBlocks(pResult, flag); + taosArrayDestroy(pResult); #endif STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); SSubmitReq *pReq = NULL; // TODO: the schema update should be handled later(TD-17965) - if (buildSubmitReqFromDataBlock(&pReq, pResult, pTSchema, SMA_VID(pSma), suid) < 0) { + if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, SMA_VID(pSma), suid) < 0) { smaError("vgId:%d, build submit req for rsma stable %" PRIi64 " level %" PRIi8 " failed since %s", SMA_VID(pSma), suid, pItem->level, terrstr()); goto _err; @@ -659,18 +644,17 @@ static int32_t tdRSmaFetchAndSubmitResult(qTaskInfo_t taskInfo, SRSmaInfoItem *p SMA_VID(pSma), suid, pItem->level, output->info.version); taosMemoryFreeClear(pReq); - taosArrayClear(pResult); } else if (terrno == 0) { smaDebug("vgId:%d, no rsma %" PRIi8 " data fetched yet", SMA_VID(pSma), pItem->level); + break; } else { smaDebug("vgId:%d, no rsma %" PRIi8 " data fetched since %s", SMA_VID(pSma), pItem->level, terrstr()); + goto _err; } } - tdDestroySDataBlockArray(pResult); return TSDB_CODE_SUCCESS; _err: - tdDestroySDataBlockArray(pResult); return TSDB_CODE_FAILED; } @@ -694,11 +678,9 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputType return TSDB_CODE_FAILED; } - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); - SRSmaStat *pStat = SMA_RSMA_STAT(pEnv->pStat); SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); - tdRSmaFetchAndSubmitResult(RSMA_INFO_QTASK(pInfo, idx), pItem, pInfo->pTSchema, suid, pStat, + tdRSmaFetchAndSubmitResult(pSma, RSMA_INFO_QTASK(pInfo, idx), pItem, pInfo->pTSchema, suid, STREAM_INPUT__DATA_SUBMIT); atomic_store_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE); @@ -724,11 +706,13 @@ static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) { SRSmaInfo *pRSmaInfo = NULL; if (!pEnv) { + terrno = TSDB_CODE_RSMA_INVALID_ENV; return NULL; } pStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); if (!pStat || !RSMA_INFO_HASH(pStat)) { + terrno = TSDB_CODE_RSMA_INVALID_STAT; return NULL; } @@ -743,12 +727,12 @@ static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) { taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); return pRSmaInfo; } + taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); if (RSMA_COMMIT_STAT(pStat) == 0) { // return NULL if not in committing stat - taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); return NULL; } - taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); + // clone the SRSmaInfo from iRsmaInfoHash to rsmaInfoHash if in committing stat SRSmaInfo *pCowRSmaInfo = NULL; @@ -779,7 +763,7 @@ static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) { ASSERT(!pCowRSmaInfo); } - if(pCowRSmaInfo) { + if (pCowRSmaInfo) { tdRefRSmaInfo(pSma, pCowRSmaInfo); } // unlock @@ -1323,7 +1307,7 @@ _err: } /** - * @brief trigger to get rsma result + * @brief trigger to get rsma result in async mode * * @param param * @param tmrId @@ -1357,8 +1341,7 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { " refId:%d", SMA_VID(pSma), pItem->level, rsmaTriggerStat, smaMgmt.rsetId, pRSmaInfo->refId); if (rsmaTriggerStat == TASK_TRIGGER_STAT_PAUSED) { - taosTmrReset(tdRSmaFetchTrigger, pItem->maxDelay > 5000 ? 5000 : pItem->maxDelay, pItem, smaMgmt.tmrHandle, - &pItem->tmrId); + taosTmrReset(tdRSmaFetchTrigger, 5000, pItem, smaMgmt.tmrHandle, &pItem->tmrId); } return; } @@ -1372,16 +1355,8 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { case TASK_TRIGGER_STAT_ACTIVE: { smaDebug("vgId:%d, fetch rsma level %" PRIi8 " data for table:%" PRIi64 " since stat is active", SMA_VID(pSma), pItem->level, pRSmaInfo->suid); - - // sync procedure => async process - - SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; - qTaskInfo_t taskInfo = pRSmaInfo->taskInfo[pItem->level - 1]; - qSetMultiStreamInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK); - tdRSmaFetchAndSubmitResult(taskInfo, pItem, pRSmaInfo->pTSchema, pRSmaInfo->suid, pStat, - STREAM_INPUT__DATA_BLOCK); - tdCleanupStreamInputDataBlock(taskInfo); - + // async process + tdRSmaFetchSend(pSma, pRSmaInfo, pItem->level); } break; case TASK_TRIGGER_STAT_PAUSED: { smaDebug("vgId:%d, not fetch rsma level %" PRIi8 " data for table:%" PRIi64 " since stat is paused", @@ -1404,3 +1379,90 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { _end: tdReleaseSmaRef(smaMgmt.rsetId, pRSmaInfo->refId); } + +/** + * @brief put rsma fetch msg to fetch queue + * + * @param pSma + * @param pInfo + * @param level + * @return int32_t + */ +int32_t tdRSmaFetchSend(SSma *pSma, SRSmaInfo *pInfo, int8_t level) { + SRSmaFetchMsg fetchMsg = {.refId = pInfo->refId, .suid = pInfo->suid, .level = level}; + int32_t ret = 0; + int32_t contLen = 0; + SEncoder encoder = {0}; + tEncodeSize(tEncodeSRSmaFetchMsg, &fetchMsg, contLen, ret); + if (ret < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tEncoderClear(&encoder); + goto _err; + } + + void *pBuf = rpcMallocCont(contLen); + tEncoderInit(&encoder, pBuf, contLen); + if (tEncodeSRSmaFetchMsg(&encoder, &fetchMsg) < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tEncoderClear(&encoder); + } + tEncoderClear(&encoder); + SRpcMsg rpcMsg = { + .code = 0, + .msgType = TDMT_VND_FETCH_RSMA, + .pCont = pBuf, + .contLen = contLen, + }; + + if ((terrno = tmsgPutToQueue(&pSma->pVnode->msgCb, FETCH_QUEUE, &rpcMsg)) != 0) { + smaError("vgId:%d, failed to put rsma fetch msg into fetch-queue for suid:%d level:%" PRIi8 " since %s", + SMA_VID(pSma), pInfo->suid, level, terrstr()); + goto _err; + } + + return TSDB_CODE_SUCCESS; +_err: + return TSDB_CODE_FAILED; +} + +int32_t smaProcessFetch(SSma *pSma, void *pMsg) { + SRpcMsg *pRpcMsg = (SRpcMsg *)pMsg; + SRSmaFetchMsg req = {0}; + SDecoder decoder = {0}; + SRSmaInfo *pInfo = NULL; + SRSmaInfoItem *pItem = NULL; + + tDecoderInit(&decoder, pRpcMsg->pCont, pRpcMsg->contLen); + if (tDecodeSRSmaFetchMsg(&decoder, &req) < 0) { + terrno = TSDB_CODE_INVALID_MSG; + goto _err; + } + + pInfo = tdAcquireRSmaInfoBySuid(pSma, req.suid); + if (!pInfo) { + smaDebug("vgId:%d, failed to process rsma fetch msg since Empty rsma info", SMA_VID(pSma)); + goto _err; + } + + pItem = RSMA_INFO_ITEM(pInfo, req.level - 1); + + SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; + qTaskInfo_t taskInfo = RSMA_INFO_QTASK(pInfo, req.level - 1); + if ((terrno = qSetMultiStreamInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { + goto _err; + } + if (tdRSmaFetchAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid, STREAM_INPUT__DATA_BLOCK) < 0) { + goto _err; + } + + tdCleanupStreamInputDataBlock(taskInfo); + + tdReleaseRSmaInfo(pSma, pInfo); + tDecoderClear(&decoder); + return TSDB_CODE_SUCCESS; +_err: + tdReleaseRSmaInfo(pSma, pInfo); + tDecoderClear(&decoder); + smaError("vgId:%d, failed to process rsma fetch msg since %s", SMA_VID(pSma), terrstr()); + return TSDB_CODE_FAILED; +} diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 15cf183b2ad5cea0706388d3c3ee92e0911319b0..0f8ec07016d018f1a57e676bc5e5304d613122bd 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -325,6 +325,8 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return vnodeGetTableCfg(pVnode, pMsg, true); case TDMT_VND_BATCH_META: return vnodeGetBatchMeta(pVnode, pMsg); + case TDMT_VND_FETCH_RSMA: + return smaProcessFetch(pVnode->pSma, pMsg); case TDMT_VND_CONSUME: return tqProcessPollReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_RUN: diff --git a/source/libs/sync/inc/syncRaftLog.h b/source/libs/sync/inc/syncRaftLog.h index 65ec77e38ff10ff77de1d4000515439ad7844ef9..ff59189a9d6a96566e3246a5ed8fa25ca819e440 100644 --- a/source/libs/sync/inc/syncRaftLog.h +++ b/source/libs/sync/inc/syncRaftLog.h @@ -47,6 +47,8 @@ char* logStoreSimple2Str(SSyncLogStore* pLogStore); SyncIndex logStoreFirstIndex(SSyncLogStore* pLogStore); +SyncIndex logStoreWalCommitVer(SSyncLogStore* pLogStore); + // for debug void logStorePrint(SSyncLogStore* pLogStore); void logStorePrint2(char* s, SSyncLogStore* pLogStore); diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index f31f3dd1aea037b36846cf0fda55a564220e12fb..4f93d8197dc801ae86619858b15d9055059565eb 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -357,16 +357,14 @@ static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) { code = ths->pLogStore->syncLogTruncate(ths->pLogStore, delBegin); ASSERT(code == 0); - char eventLog[128]; - snprintf(eventLog, sizeof(eventLog), "log truncate, from %" PRId64 " to %" PRId64, delBegin, delEnd); - syncNodeEventLog(ths, eventLog); - logStoreSimpleLog2("after syncNodeMakeLogSame", ths->pLogStore); - return code; } +// if FromIndex > walCommitVer, return 0 +// else return num of pass entries static int32_t syncNodeDoMakeLogSame(SSyncNode* ths, SyncIndex FromIndex) { - int32_t code; + int32_t code = 0; + int32_t pass = 0; SyncIndex delBegin = FromIndex; SyncIndex delEnd = ths->pLogStore->syncLogLastIndex(ths->pLogStore); @@ -398,16 +396,31 @@ static int32_t syncNodeDoMakeLogSame(SSyncNode* ths, SyncIndex FromIndex) { } } + // update delete begin + SyncIndex walCommitVer = logStoreWalCommitVer(ths->pLogStore); + + if (delBegin <= walCommitVer) { + delBegin = walCommitVer + 1; + pass = walCommitVer - delBegin + 1; + + do { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "update delete begin to %ld", delBegin); + syncNodeEventLog(ths, logBuf); + } while (0); + } + // delete confict entries code = ths->pLogStore->syncLogTruncate(ths->pLogStore, delBegin); ASSERT(code == 0); - char eventLog[128]; - snprintf(eventLog, sizeof(eventLog), "log truncate, from %" PRId64 " to %" PRId64, delBegin, delEnd); - syncNodeEventLog(ths, eventLog); - logStoreSimpleLog2("after syncNodeMakeLogSame", ths->pLogStore); + do { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "make log same from:%ld, delbegin:%ld, pass:%d", FromIndex, delBegin, pass); + syncNodeEventLog(ths, logBuf); + } while (0); - return code; + return pass; } int32_t syncNodePreCommit(SSyncNode* ths, SSyncRaftEntry* pEntry, int32_t code) { @@ -543,31 +556,34 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc SOffsetAndContLen* metaTableArr = syncAppendEntriesBatchMetaTableArray(pMsg); if (hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex) { - // make log same - do { - SyncIndex logLastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - bool hasExtraEntries = logLastIndex > pMsg->prevLogIndex; + int32_t pass = 0; + SyncIndex logLastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); + bool hasExtraEntries = logLastIndex > pMsg->prevLogIndex; - if (hasExtraEntries) { - // make log same, rollback deleted entries - code = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1); - ASSERT(code == 0); - } - - } while (0); + // make log same + if (hasExtraEntries) { + // make log same, rollback deleted entries + pass = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1); + ASSERT(pass >= 0); + } // append entry batch - for (int32_t i = 0; i < pMsg->dataCount; ++i) { - SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset); - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - return -1; - } + if (pass == 0) { + // assert! no batch + ASSERT(pMsg->dataCount <= 1); + + for (int32_t i = 0; i < pMsg->dataCount; ++i) { + SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset); + code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); + if (code != 0) { + return -1; + } - code = syncNodePreCommit(ths, pAppendEntry, 0); - ASSERT(code == 0); + code = syncNodePreCommit(ths, pAppendEntry, 0); + ASSERT(code == 0); - // syncEntryDestory(pAppendEntry); + // syncEntryDestory(pAppendEntry); + } } // fsync once @@ -670,25 +686,33 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc syncLogRecvAppendEntriesBatch(ths, pMsg, "really match"); + int32_t pass = 0; + if (hasExtraEntries) { // make log same, rollback deleted entries - code = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1); - ASSERT(code == 0); + pass = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1); + ASSERT(pass >= 0); } if (hasAppendEntries) { // append entry batch - for (int32_t i = 0; i < pMsg->dataCount; ++i) { - SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset); - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - return -1; - } + if (pass == 0) { + // assert! no batch + ASSERT(pMsg->dataCount <= 1); + + // append entry batch + for (int32_t i = 0; i < pMsg->dataCount; ++i) { + SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset); + code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); + if (code != 0) { + return -1; + } - code = syncNodePreCommit(ths, pAppendEntry, 0); - ASSERT(code == 0); + code = syncNodePreCommit(ths, pAppendEntry, 0); + ASSERT(code == 0); - // syncEntryDestory(pAppendEntry); + // syncEntryDestory(pAppendEntry); + } } // fsync once diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index a603cfff2762bbd4fea088c9a4ad120d0471fce0..3a94ed9713ba2b12e2ce766b3dfd9e615b309d9f 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -92,6 +92,12 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { } } + // advance commit index as large as possible + SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore); + if (walCommitVer > newCommitIndex) { + newCommitIndex = walCommitVer; + } + // maybe execute fsm if (newCommitIndex > pSyncNode->commitIndex) { SyncIndex beginIndex = pSyncNode->commitIndex + 1; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 78004c0ad601b403ce1046ae41ac6ff995c6e7f9..c17d91182e0f90cbec32766373da5958aff6b31b 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -2409,6 +2409,9 @@ static void syncNodeEqElectTimer(void* param, void* tmrId) { static void syncNodeEqHeartbeatTimer(void* param, void* tmrId) { SSyncNode* pSyncNode = (SSyncNode*)param; + + syncNodeEventLog(pSyncNode, "eq hb timer"); + if (pSyncNode->replicaNum > 1) { if (atomic_load_64(&pSyncNode->heartbeatTimerLogicClockUser) <= atomic_load_64(&pSyncNode->heartbeatTimerLogicClock)) { diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index b575e40d86884c9fdd688db03e4cc8492e9ea0d3..0649e064e45391cfe9082c24264a33b762d1a279 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -305,10 +305,18 @@ static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, return code; } +// truncate semantic static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex) { SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; - int32_t code = walRollback(pWal, fromIndex); + + // need not truncate + SyncIndex wallastVer = walGetLastVer(pWal); + if (fromIndex > wallastVer) { + return 0; + } + + int32_t code = walRollback(pWal, fromIndex); if (code != 0) { int32_t err = terrno; const char* errStr = tstrerror(err); @@ -323,7 +331,7 @@ static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIn // event log do { char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "wal truncate, from-index:%" PRId64, fromIndex); + snprintf(logBuf, sizeof(logBuf), "log truncate, from-index:%" PRId64, fromIndex); syncNodeEventLog(pData->pSyncNode, logBuf); } while (0); @@ -637,6 +645,12 @@ SyncIndex logStoreFirstIndex(SSyncLogStore* pLogStore) { return walGetFirstVer(pWal); } +SyncIndex logStoreWalCommitVer(SSyncLogStore* pLogStore) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + return walGetCommittedVer(pWal); +} + // for debug ----------------- void logStorePrint(SSyncLogStore* pLogStore) { char* serialized = logStore2Str(pLogStore); diff --git a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim index bde56cb862153ba2d9a6efbb2a268c4917701845..86bdbdcded182ac629122d93ea7debf87c43fb81 100644 --- a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim +++ b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim @@ -29,8 +29,8 @@ sql insert into ct1 values(now, 10); sql insert into ct1 values(now+1s, 1); sql insert into ct1 values(now+2s, 100); -print =============== wait maxdelay 15+1 seconds for results -sleep 16000 +print =============== wait maxdelay 15+2 seconds for results +sleep 17000 print =============== select * from retention level 2 from memory sql select * from ct1; diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index 1b54e5a47d91506dab3eae7ab95931c73467b62a..405d22ebddb5524c17936a36beeaa121d9fc8634 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -29,8 +29,8 @@ sql insert into ct1 values(now, 10, 10.0); sql insert into ct1 values(now+1s, 1, 1.0); sql insert into ct1 values(now+2s, 100, 100.0); -print =============== wait maxdelay 5+1 seconds for results -sleep 6000 +print =============== wait maxdelay 5+2 seconds for results +sleep 7000 print =============== select * from retention level 2 from memory sql select * from ct1; @@ -135,8 +135,8 @@ print =============== insert after rsma qtaskinfo recovery sql insert into ct1 values(now, 50, 500.0); sql insert into ct1 values(now+1s, 40, 40.0); -print =============== wait maxdelay 5+1 seconds for results -sleep 6000 +print =============== wait maxdelay 5+2 seconds for results +sleep 7000 print =============== select * from retention level 2 from file and memory after rsma qtaskinfo recovery sql select * from ct1;