diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 89ec9dc6c833a7173d83c67dc386a476ff5bf4ef..005cf36d5ea11e8adaeb61a95a9536c1af746982 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -137,6 +137,7 @@ extern int64_t tsWalFsyncDataSizeLimit; // internal extern int32_t tsTransPullupInterval; extern int32_t tsMqRebalanceInterval; +extern int32_t tsStreamCheckpointTickInterval; extern int32_t tsTtlUnit; extern int32_t tsTtlPushInterval; extern int32_t tsGrantHBInterval; diff --git a/include/common/tmsg.h b/include/common/tmsg.h index a84fa3c9f0cc0343d87f9d062195eaa9793861a5..ad992fd9dbf50f9c789c636238b3d005de788fa8 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1146,6 +1146,13 @@ typedef struct { int32_t tSerializeSMTimerMsg(void* buf, int32_t bufLen, SMTimerReq* pReq); int32_t tDeserializeSMTimerMsg(void* buf, int32_t bufLen, SMTimerReq* pReq); +typedef struct { + int64_t tick; +} SMStreamTickReq; + +int32_t tSerializeSMStreamTickMsg(void* buf, int32_t bufLen, SMStreamTickReq* pReq); +int32_t tDeserializeSMStreamTickMsg(void* buf, int32_t bufLen, SMStreamTickReq* pReq); + typedef struct { int32_t id; uint16_t port; // node sync Port @@ -1748,6 +1755,8 @@ typedef struct { int64_t watermark; int32_t numOfTags; SArray* pTags; // array of SField + // 3.0.20 + int64_t checkpointFreq; // ms } SCMCreateStreamReq; typedef struct { @@ -1947,6 +1956,12 @@ typedef struct { SHashObj* rebSubHash; // SHashObj } SMqDoRebalanceMsg; +typedef struct { + int64_t streamId; + int64_t checkpointId; + char streamName[TSDB_STREAM_FNAME_LEN]; +} SMStreamDoCheckpointMsg; + typedef struct { int64_t status; } SMVSubscribeRsp; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index e80766d249fe4ea3f2134a5a5e7b270dd7522f2e..7833bdf1393c72b988b1553d691e7d8936b14603 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -172,6 +172,8 @@ enum { TD_DEF_MSG_TYPE(TDMT_MND_SERVER_VERSION, "server-version", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_UPTIME_TIMER, "uptime-timer", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, "lost-consumer-clear", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_MSG) @@ -241,8 +243,11 @@ enum { TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_RECOVER_FINISH, "vnode-stream-finish", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "vnode-stream-task-check", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_RECOVER_FINISH, "stream-recover-finish", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESTORE_CHECKPOINT, "stream-restore-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_MON_MSG) @@ -282,6 +287,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE, "vnode-stream-recover1", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, "vnode-stream-recover2", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_TMQ_MSG) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 4099551188cc1c8e75a01a5bb0dec177ad559da7..0196dcb0a818479b81f14fd1755d37fd11ffee52 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -275,31 +275,6 @@ typedef struct { SEpSet epSet; } SStreamChildEpInfo; -typedef struct { - int32_t srcNodeId; - int32_t srcChildId; - int64_t stateSaveVer; - int64_t stateProcessedVer; -} SStreamCheckpointInfo; - -typedef struct { - int64_t streamId; - int64_t checkTs; - int32_t checkpointId; // incremental - int32_t taskId; - SArray* checkpointVer; // SArray -} SStreamMultiVgCheckpointInfo; - -typedef struct { - int32_t taskId; - int32_t checkpointId; // incremental -} SStreamCheckpointKey; - -typedef struct { - int32_t taskId; - SArray* checkpointVer; -} SStreamRecoveringState; - typedef struct SStreamTask { int64_t streamId; int32_t taskId; @@ -364,6 +339,10 @@ typedef struct SStreamTask { int64_t checkReqId; SArray* checkReqIds; // shuffle int32_t refCnt; + + int64_t checkpointingId; + int32_t checkpointAlignCnt; + } SStreamTask; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -509,6 +488,60 @@ typedef struct { int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq); int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq); +typedef struct { + int64_t streamId; + int64_t checkpointId; + int32_t taskId; + int32_t nodeId; + int64_t expireTime; +} SStreamCheckpointSourceReq; + +typedef struct { + int64_t streamId; + int64_t checkpointId; + int32_t taskId; + int32_t nodeId; + int64_t expireTime; +} SStreamCheckpointSourceRsp; + +int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq); +int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq); + +int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp); +int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp); + +typedef struct { + SMsgHead msgHead; + int64_t streamId; + int64_t checkpointId; + int32_t downstreamTaskId; + int32_t downstreamNodeId; + int32_t upstreamTaskId; + int32_t upstreamNodeId; + int32_t childId; + int64_t expireTime; + int8_t taskLevel; +} SStreamCheckpointReq; + +typedef struct { + SMsgHead msgHead; + int64_t streamId; + int64_t checkpointId; + int32_t downstreamTaskId; + int32_t downstreamNodeId; + int32_t upstreamTaskId; + int32_t upstreamNodeId; + int32_t childId; + int64_t expireTime; + int8_t taskLevel; +} SStreamCheckpointRsp; + +int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq); +int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq); + +int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp); +int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp); + typedef struct { int64_t streamId; int32_t downstreamTaskId; @@ -598,18 +631,22 @@ void streamMetaClose(SStreamMeta* streamMeta); int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask); int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t startVer, char* msg, int32_t msgLen); -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); SStreamTask* streamMetaGetTask(SStreamMeta* pMeta, int32_t taskId); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); -void streamMetaRemoveTask1(SStreamMeta* pMeta, int32_t taskId); +void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); int32_t streamMetaBegin(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaRollBack(SStreamMeta* pMeta); int32_t streamLoadTasks(SStreamMeta* pMeta); +// checkpoint +int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); +int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq); +int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp); + #ifdef __cplusplus } #endif diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index ab46ba24cffbfbe6e04989cc892c1bb40c856360..2e8cb5e5f739110ce75c94bea22aa820862f97d0 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -167,6 +167,7 @@ int64_t tsWalFsyncDataSizeLimit = (100 * 1024 * 1024L); // internal int32_t tsTransPullupInterval = 2; int32_t tsMqRebalanceInterval = 2; +int32_t tsStreamCheckpointTickInterval = 1; int32_t tsTtlUnit = 86400; int32_t tsTtlPushInterval = 86400; int32_t tsGrantHBInterval = 60; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 24dd73604a985375a655ce305f56265c2bae1d63..8dc4633443e3224254fe72a64e9313f357014c42 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -3748,6 +3748,31 @@ int32_t tDeserializeSMTimerMsg(void *buf, int32_t bufLen, SMTimerReq *pReq) { return 0; } +int32_t tSerializeSMStreamTickMsg(void *buf, int32_t bufLen, SMStreamTickReq *pReq) { + SEncoder encoder = {0}; + tEncoderInit(&encoder, buf, bufLen); + + if (tStartEncode(&encoder) < 0) return -1; + if (tEncodeI64(&encoder, pReq->tick) < 0) return -1; + tEndEncode(&encoder); + + int32_t tlen = encoder.pos; + tEncoderClear(&encoder); + return tlen; +} + +int32_t tDeserializeSMStreamTickMsg(void *buf, int32_t bufLen, SMStreamTickReq *pReq) { + SDecoder decoder = {0}; + tDecoderInit(&decoder, buf, bufLen); + + if (tStartDecode(&decoder) < 0) return -1; + if (tDecodeI64(&decoder, &pReq->tick) < 0) return -1; + tEndDecode(&decoder); + + tDecoderClear(&decoder); + return 0; +} + int32_t tEncodeSReplica(SEncoder *pEncoder, SReplica *pReplica) { if (tEncodeI32(pEncoder, pReplica->id) < 0) return -1; if (tEncodeU16(pEncoder, pReplica->port) < 0) return -1; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 7ecf60dc2d0de1b0ff9f9e32d64a7f3ff68d2cf8..9961828747f290cf501ceb2c51ee744b4a31349c 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -640,10 +640,14 @@ typedef struct { SArray* tasks; // SArray> SSchemaWrapper outputSchema; SSchemaWrapper tagSchema; + + // 3.0.20 + int64_t checkpointFreq; // ms + int64_t currentTick; // do not serialize } SStreamObj; int32_t tEncodeSStreamObj(SEncoder* pEncoder, const SStreamObj* pObj); -int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj); +int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj, int32_t sver); void tFreeStreamObj(SStreamObj* pObj); typedef struct { @@ -653,15 +657,6 @@ typedef struct { SArray* childInfo; // SArray } SStreamCheckpointObj; -#if 0 -typedef struct { - int64_t uid; - int64_t streamId; - int8_t status; - int8_t stage; -} SStreamRecoverObj; -#endif - #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index 2e984212a1a905e722d81e2e2c7c325760812c47..b5c2fb05b3adb3f3e7d0c3ce19f3da055fabdb3c 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -76,11 +76,14 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { if (tEncodeSSchemaWrapper(pEncoder, &pObj->outputSchema) < 0) return -1; + // 3.0.20 + if (tEncodeI64(pEncoder, pObj->checkpointFreq) < 0) return -1; + tEndEncode(pEncoder); return pEncoder->pos; } -int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj) { +int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeCStrTo(pDecoder, pObj->name) < 0) return -1; @@ -139,6 +142,10 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj) { if (tDecodeSSchemaWrapper(pDecoder, &pObj->outputSchema) < 0) return -1; + // 3.0.20 + if (sver >= 2) { + if (tDecodeI64(pDecoder, &pObj->checkpointFreq) < 0) return -1; + } tEndDecode(pDecoder); return 0; } diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index e6aee3481f0335057253b5420493c6520c577162..9f4c2e048f09b530fa2fac839fd12c192d3a8d5c 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -85,6 +85,21 @@ static void *mndBuildTimerMsg(int32_t *pContLen) { return pReq; } +static void *mndBuildCheckpointTickMsg(int32_t *pContLen, int64_t sec) { + SMStreamTickReq timerReq = { + .tick = sec, + }; + + int32_t contLen = tSerializeSMStreamTickMsg(NULL, 0, &timerReq); + if (contLen <= 0) return NULL; + void *pReq = rpcMallocCont(contLen); + if (pReq == NULL) return NULL; + + tSerializeSMStreamTickMsg(pReq, contLen, &timerReq); + *pContLen = contLen; + return pReq; +} + static void mndPullupTrans(SMnode *pMnode) { int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); @@ -105,7 +120,24 @@ static void mndCalMqRebalance(SMnode *pMnode) { int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); if (pReq != NULL) { - SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen}; + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_TMQ_TIMER, + .pCont = pReq, + .contLen = contLen, + }; + tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); + } +} + +static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) { + int32_t contLen = 0; + void *pReq = mndBuildCheckpointTickMsg(&contLen, sec); + if (pReq != NULL) { + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER, + .pCont = pReq, + .contLen = contLen, + }; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } } @@ -224,6 +256,12 @@ static void *mndThreadFp(void *param) { mndCalMqRebalance(pMnode); } +#if 0 + if (sec % tsStreamCheckpointTickInterval == 0) { + mndStreamCheckpointTick(pMnode, sec); + } +#endif + if (sec % tsTelemInterval == (TMIN(60, (tsTelemInterval - 1)))) { mndPullupTelem(pMnode); } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 44ff8733fd326e3c28fa1481cbe6f95b06d2c8af..7ee688d220931e3a0bf70e93b99a000e0852b5e1 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -28,7 +28,7 @@ #include "parser.h" #include "tname.h" -#define MND_STREAM_VER_NUMBER 1 +#define MND_STREAM_VER_NUMBER 2 #define MND_STREAM_RESERVE_SIZE 64 static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); @@ -36,6 +36,8 @@ static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pStream, SStreamObj *pNewStream); static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq); static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); +static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); +static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); /*static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq);*/ static int32_t mndProcessStreamMetaReq(SRpcMsg *pReq); static int32_t mndGetStreamMeta(SRpcMsg *pReq, SShowObj *pShow, STableMetaRsp *pMeta); @@ -62,6 +64,10 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_DEPLOY_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_DROP_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); + mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_STREAMS, mndRetrieveStream); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_STREAMS, mndCancelGetNextStream); mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndRetrieveStreamTask); @@ -127,7 +133,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) { int8_t sver = 0; if (sdbGetRawSoftVer(pRaw, &sver) != 0) goto STREAM_DECODE_OVER; - if (sver != MND_STREAM_VER_NUMBER) { + if (sver != 1 && sver != 2) { terrno = TSDB_CODE_SDB_INVALID_DATA_VER; goto STREAM_DECODE_OVER; } @@ -147,7 +153,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) { SDecoder decoder; tDecoderInit(&decoder, buf, tlen + 1); - if (tDecodeSStreamObj(&decoder, pStream) < 0) { + if (tDecodeSStreamObj(&decoder, pStream, sver) < 0) { tDecoderClear(&decoder); goto STREAM_DECODE_OVER; } @@ -680,93 +686,183 @@ _OVER: tFreeStreamObj(&streamObj); return code; } - -static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { +static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + void *pIter = NULL; SStreamObj *pStream = NULL; - /*SDbObj *pDb = NULL;*/ - /*SUserObj *pUser = NULL;*/ - SMDropStreamReq dropReq = {0}; - if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) { - ASSERT(0); - terrno = TSDB_CODE_INVALID_MSG; - return -1; + // iterate all stream obj + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) break; + // incr tick + int64_t currentTick = atomic_add_fetch_64(&pStream->currentTick, 1); + // if >= checkpointFreq, build msg TDMT_MND_STREAM_BEGIN_CHECKPOINT, put into write q + if (currentTick >= pStream->checkpointFreq) { + atomic_store_64(&pStream->currentTick, 0); + SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); + + pMsg->streamId = pStream->uid; + pMsg->checkpointId = tGenIdPI64(); + memcpy(pMsg->streamName, pStream->name, TSDB_STREAM_FNAME_LEN); + + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, + .pCont = pMsg, + .contLen = sizeof(SMStreamDoCheckpointMsg), + }; + + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + } } - pStream = mndAcquireStream(pMnode, dropReq.name); + return 0; +} - if (pStream == NULL) { - if (dropReq.igNotExists) { - mInfo("stream:%s, not exist, ignore not exist is set", dropReq.name); - sdbRelease(pMnode->pSdb, pStream); - return 0; - } else { - terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; - return -1; - } - } +static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, const SStreamTask *pTask, + SMStreamDoCheckpointMsg *pMsg) { + SStreamCheckpointSourceReq req = {0}; + req.checkpointId = pMsg->checkpointId; + req.nodeId = pTask->nodeId; + req.expireTime = -1; + req.streamId = pTask->streamId; + req.taskId = pTask->taskId; - if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { + int32_t code; + int32_t blen; + + tEncodeSize(tEncodeSStreamCheckpointSourceReq, &req, blen, code); + if (code < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "drop-stream"); - mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); - if (pTrans == NULL) { - mError("stream:%s, failed to drop since %s", dropReq.name, terrstr()); - sdbRelease(pMnode->pSdb, pStream); + int32_t tlen = sizeof(SMsgHead) + blen; + + void *buf = taosMemoryMalloc(tlen); + if (buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - mInfo("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); - // drop all tasks - if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { - mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); - sdbRelease(pMnode->pSdb, pStream); - mndTransDrop(pTrans); + void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + tEncodeSStreamCheckpointSourceReq(&encoder, &req); + + SMsgHead *pMsgHead = (SMsgHead *)buf; + pMsgHead->contLen = htonl(tlen); + pMsgHead->vgId = htonl(pTask->nodeId); + + tEncoderClear(&encoder); + + *pBuf = buf; + *pLen = tlen; + + return 0; +} + +static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + + SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; + + SStreamObj *pStream = mndAcquireStream(pMnode, pMsg->streamName); + + if (pStream == NULL || pStream->uid != pMsg->streamId) { + mError("start checkpointing failed since stream %s not found", pMsg->streamName); return -1; } - // drop stream - if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { - sdbRelease(pMnode->pSdb, pStream); - mndTransDrop(pTrans); - return -1; + // build new transaction: + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "stream-checkpoint"); + if (pTrans == NULL) return -1; + mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); + taosRLockLatch(&pStream->lock); + // 1. redo action: broadcast checkpoint source msg for all source vg + int32_t totLevel = taosArrayGetSize(pStream->tasks); + for (int32_t i = 0; i < totLevel; i++) { + SArray *pLevel = taosArrayGetP(pStream->tasks, i); + SStreamTask *pTask = taosArrayGetP(pLevel, 0); + if (pTask->taskLevel == TASK_LEVEL__SOURCE) { + int32_t sz = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < sz; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + ASSERT(pTask->nodeId > 0); + SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->nodeId); + if (pVgObj == NULL) { + ASSERT(0); + taosRUnLockLatch(&pStream->lock); + mndReleaseStream(pMnode, pStream); + mndTransDrop(pTrans); + return -1; + } + + void *buf; + int32_t tlen; + if (mndBuildStreamCheckpointSourceReq(&buf, &tlen, pTask, pMsg) < 0) { + taosRUnLockLatch(&pStream->lock); + mndReleaseStream(pMnode, pStream); + mndTransDrop(pTrans); + return -1; + } + + STransAction action = {0}; + action.epSet = mndGetVgroupEpset(pMnode, pVgObj); + action.pCont = buf; + action.contLen = tlen; + action.msgType = TDMT_VND_STREAM_CHECK_POINT_SOURCE; + + mndReleaseVgroup(pMnode, pVgObj); + + if (mndTransAppendRedoAction(pTrans, &action) != 0) { + taosMemoryFree(buf); + taosRUnLockLatch(&pStream->lock); + mndReleaseStream(pMnode, pStream); + mndTransDrop(pTrans); + return -1; + } + } + } } + // 2. reset tick + atomic_store_64(&pStream->currentTick, 0); + // 3. commit log: stream checkpoint info + taosRUnLockLatch(&pStream->lock); if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); - sdbRelease(pMnode->pSdb, pStream); + mError("failed to prepare trans rebalance since %s", terrstr()); mndTransDrop(pTrans); + mndReleaseStream(pMnode, pStream); return -1; } - sdbRelease(pMnode->pSdb, pStream); + mndReleaseStream(pMnode, pStream); mndTransDrop(pTrans); - return TSDB_CODE_ACTION_IN_PROGRESS; + return 0; } -#if 0 -static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq) { +static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SStreamObj *pStream = NULL; /*SDbObj *pDb = NULL;*/ /*SUserObj *pUser = NULL;*/ - SMRecoverStreamReq recoverReq = {0}; - if (tDeserializeSMRecoverStreamReq(pReq->pCont, pReq->contLen, &recoverReq) < 0) { + SMDropStreamReq dropReq = {0}; + if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) { ASSERT(0); terrno = TSDB_CODE_INVALID_MSG; return -1; } - pStream = mndAcquireStream(pMnode, recoverReq.name); + pStream = mndAcquireStream(pMnode, dropReq.name); if (pStream == NULL) { - if (recoverReq.igNotExists) { - mInfo("stream:%s, not exist, ignore not exist is set", recoverReq.name); + if (dropReq.igNotExists) { + mInfo("stream:%s, not exist, ignore not exist is set", dropReq.name); sdbRelease(pMnode->pSdb, pStream); return 0; } else { @@ -779,39 +875,42 @@ static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq) { return -1; } - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_NOTHING, pReq); + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "drop-stream"); + mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); if (pTrans == NULL) { - mError("stream:%s, failed to recover since %s", recoverReq.name, terrstr()); + mError("stream:%s, failed to drop since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); return -1; } - mInfo("trans:%d, used to drop stream:%s", pTrans->id, recoverReq.name); + mInfo("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); - // broadcast to recover all tasks - if (mndRecoverStreamTasks(pMnode, pTrans, pStream) < 0) { - mError("stream:%s, failed to recover task since %s", recoverReq.name, terrstr()); + // drop all tasks + if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { + mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); return -1; } - // update stream status - if (mndSetStreamRecover(pMnode, pTrans, pStream) < 0) { + // drop stream + if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); return -1; } if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare recover stream trans since %s", pTrans->id, terrstr()); + mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); return -1; } sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); return TSDB_CODE_ACTION_IN_PROGRESS; } -#endif int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { SSdb *pSdb = pMnode->pSdb; @@ -847,13 +946,6 @@ int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { } } -#if 0 - if (mndSetDropOffsetStreamLogs(pMnode, pTrans, pStream) < 0) { - sdbRelease(pSdb, pStream); - goto END; - } -#endif - sdbRelease(pSdb, pStream); } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 3cf5e17cd3606b46f50667e15a27a8ade843a867..55e073a8a4a4e10df6410d5e77dd975b1bde3535 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -440,9 +440,9 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR } static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOutputObj *pOutput) { - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pMsg, "persist-reb"); - mndTransSetDbName(pTrans, pOutput->pSub->dbName, NULL); + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pMsg, "tmq-reb"); if (pTrans == NULL) return -1; + mndTransSetDbName(pTrans, pOutput->pSub->dbName, NULL); // make txn: // 1. redo action: action to all vg @@ -523,28 +523,6 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); } -#if 0 - if (consumerNum) { - char topic[TSDB_TOPIC_FNAME_LEN]; - char cgroup[TSDB_CGROUP_LEN]; - mndSplitSubscribeKey(pOutput->pSub->key, topic, cgroup, true); - SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic); - if (pTopic) { - // TODO make topic complete - SMqTopicObj topicObj = {0}; - memcpy(&topicObj, pTopic, sizeof(SMqTopicObj)); - topicObj.refConsumerCnt = pTopic->refConsumerCnt - consumerNum; - // TODO is that correct? - pTopic->refConsumerCnt = topicObj.refConsumerCnt; - mInfo("subscribe topic %s unref %d consumer cgroup %s, refcnt %d", pTopic->name, consumerNum, cgroup, - topicObj.refConsumerCnt); - if (mndSetTopicCommitLogs(pMnode, pTrans, &topicObj) != 0) { - ASSERT(0); - goto REB_FAIL; - } - } - } -#endif // 4. TODO commit log: modification log diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index aa55204ae5ed8c511684ca10b5eeb2067be5bad5..b133226ed39bb5c20ed96b56d95881d009e1e872 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -168,7 +168,7 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; - streamMetaRemoveTask1(pSnode->pMeta, pReq->taskId); + streamMetaRemoveTask(pSnode->pMeta, pReq->taskId); return 0; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 5066223134b1ccfac802eb93cae08b286207a999..04cf8bacc029a4d9e033410d42c67ebba64534ba 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1425,7 +1425,7 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessTaskDropReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; - streamMetaRemoveTask1(pTq->pStreamMeta, pReq->taskId); + streamMetaRemoveTask(pTq->pStreamMeta, pReq->taskId); return 0; } diff --git a/source/libs/stream/inc/streamInc.h b/source/libs/stream/inc/streamInc.h index 5ff49502df60401e1f08ea7aeeaf37f66e717e19..66496f11f8b2da8ad46d91637efdeb1444f26319 100644 --- a/source/libs/stream/inc/streamInc.h +++ b/source/libs/stream/inc/streamInc.h @@ -17,7 +17,6 @@ #define _STREAM_INC_H_ #include "executor.h" -#include "tref.h" #include "tstream.h" #ifdef __cplusplus @@ -25,9 +24,8 @@ extern "C" { #endif typedef struct { - int8_t inited; - int32_t refPool; - void* timer; + int8_t inited; + void* timer; } SStreamGlobalEnv; static SStreamGlobalEnv streamEnv; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c new file mode 100644 index 0000000000000000000000000000000000000000..efd19074da1b2f51e1217b3f7ab359f2b4a33c95 --- /dev/null +++ b/source/libs/stream/src/streamCheckpoint.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "streamInc.h" + +int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->nodeId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->nodeId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} + +int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->taskId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->nodeId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->nodeId) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} + +int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->downstreamTaskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->downstreamNodeId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->upstreamTaskId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->upstreamNodeId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->childId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1; + if (tEncodeI8(pEncoder, pReq->taskLevel) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->downstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->downstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->childId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1; + if (tDecodeI8(pDecoder, &pReq->taskLevel) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} + +int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->upstreamTaskId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->upstreamNodeId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1; + if (tEncodeI8(pEncoder, pRsp->taskLevel) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->downstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->downstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->upstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->upstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1; + if (tDecodeI8(pDecoder, &pRsp->taskLevel) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} + +static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) { + if (pTask->checkpointingId == 0) { + pTask->checkpointingId = checkpointId; + pTask->checkpointAlignCnt = taosArrayGetSize(pTask->childEpInfo); + } + + ASSERT(pTask->checkpointingId == checkpointId); + + return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1); +} + +static int32_t streamDoCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) { + // commit tdb state + streamStateCommit(pTask->pState); + // commit non-tdb state + // copy and save new state + // report to mnode + // send checkpoint req to downstream + return 0; +} + +static int32_t streamDoSourceCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) { + // ref wal + // set status checkpointing + // do checkpoint + return 0; +} +int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) { + int32_t code; + int64_t checkpointId = pReq->checkpointId; + + code = streamDoSourceCheckpoint(pMeta, pTask, checkpointId); + if (code < 0) { + // rsp error + return -1; + } + + return 0; +} + +int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq) { + int32_t code; + int64_t checkpointId = pReq->checkpointId; + int32_t childId = pReq->childId; + + if (taosArrayGetSize(pTask->childEpInfo) > 0) { + code = streamAlignCheckpoint(pTask, checkpointId, childId); + if (code > 0) { + return 0; + } + if (code < 0) { + ASSERT(0); + return -1; + } + } + + code = streamDoCheckpoint(pMeta, pTask, checkpointId); + if (code < 0) { + // rsp error + return -1; + } + + // send rsp to all children + + return 0; +} + +int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp) { + // recover step2, scan from wal + // unref wal + // set status normal + return 0; +} diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index a864814a7472ac347f39263ca1f32ec5a29d5a51..fc6e1668ba8f7e8a7def4b86314da40ff184340a 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -202,7 +202,7 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { } } -void streamMetaRemoveTask1(SStreamMeta* pMeta, int32_t taskId) { +void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); if (ppTask) { SStreamTask* pTask = *ppTask; @@ -219,35 +219,6 @@ void streamMetaRemoveTask1(SStreamMeta* pMeta, int32_t taskId) { } } -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); - if (ppTask) { - SStreamTask* pTask = *ppTask; - taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t)); - atomic_store_8(&pTask->taskStatus, TASK_STATUS__DROPPING); - - if (tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), &pMeta->txn) < 0) { - /*return -1;*/ - } - - if (pTask->triggerParam != 0) { - taosTmrStop(pTask->timer); - } - - while (1) { - int8_t schedStatus = - atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__DROPPING); - if (schedStatus != TASK_SCHED_STATUS__ACTIVE) { - tFreeSStreamTask(pTask); - break; - } - taosMsleep(10); - } - } - - return 0; -} - int32_t streamMetaBegin(SStreamMeta* pMeta) { if (tdbTxnOpen(&pMeta->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 7eee95a580cfc9ee01767998fcc9aabe418682ba..6889a870d19b2ec01f2f77ab57d8e70866599053 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -325,46 +325,3 @@ int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishR tEndDecode(pDecoder); return 0; } - -int32_t tEncodeSStreamCheckpointInfo(SEncoder* pEncoder, const SStreamCheckpointInfo* pCheckpoint) { - if (tEncodeI32(pEncoder, pCheckpoint->srcNodeId) < 0) return -1; - if (tEncodeI32(pEncoder, pCheckpoint->srcChildId) < 0) return -1; - if (tEncodeI64(pEncoder, pCheckpoint->stateProcessedVer) < 0) return -1; - return 0; -} - -int32_t tDecodeSStreamCheckpointInfo(SDecoder* pDecoder, SStreamCheckpointInfo* pCheckpoint) { - if (tDecodeI32(pDecoder, &pCheckpoint->srcNodeId) < 0) return -1; - if (tDecodeI32(pDecoder, &pCheckpoint->srcChildId) < 0) return -1; - if (tDecodeI64(pDecoder, &pCheckpoint->stateProcessedVer) < 0) return -1; - return 0; -} - -int32_t tEncodeSStreamMultiVgCheckpointInfo(SEncoder* pEncoder, const SStreamMultiVgCheckpointInfo* pCheckpoint) { - if (tEncodeI64(pEncoder, pCheckpoint->streamId) < 0) return -1; - if (tEncodeI64(pEncoder, pCheckpoint->checkTs) < 0) return -1; - if (tEncodeI32(pEncoder, pCheckpoint->checkpointId) < 0) return -1; - if (tEncodeI32(pEncoder, pCheckpoint->taskId) < 0) return -1; - int32_t sz = taosArrayGetSize(pCheckpoint->checkpointVer); - if (tEncodeI32(pEncoder, sz) < 0) return -1; - for (int32_t i = 0; i < sz; i++) { - SStreamCheckpointInfo* pOneVgCkpoint = taosArrayGet(pCheckpoint->checkpointVer, i); - if (tEncodeSStreamCheckpointInfo(pEncoder, pOneVgCkpoint) < 0) return -1; - } - return 0; -} - -int32_t tDecodeSStreamMultiVgCheckpointInfo(SDecoder* pDecoder, SStreamMultiVgCheckpointInfo* pCheckpoint) { - if (tDecodeI64(pDecoder, &pCheckpoint->streamId) < 0) return -1; - if (tDecodeI64(pDecoder, &pCheckpoint->checkTs) < 0) return -1; - if (tDecodeI32(pDecoder, &pCheckpoint->checkpointId) < 0) return -1; - if (tDecodeI32(pDecoder, &pCheckpoint->taskId) < 0) return -1; - int32_t sz; - if (tDecodeI32(pDecoder, &sz) < 0) return -1; - for (int32_t i = 0; i < sz; i++) { - SStreamCheckpointInfo oneVgCheckpoint; - if (tDecodeSStreamCheckpointInfo(pDecoder, &oneVgCheckpoint) < 0) return -1; - taosArrayPush(pCheckpoint->checkpointVer, &oneVgCheckpoint); - } - return 0; -} diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 03306af925318866539cd6c300975b7ae95fee67..f0be976402a95bdbcc0bcfe1de177516851e840d 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -74,7 +74,7 @@ SyncTerm syncLogReplMgrGetPrevLogTerm(SSyncLogReplMgr* pMgr, SSyncNode* pNode, S SyncTerm prevLogTerm = -1; terrno = TSDB_CODE_SUCCESS; - if (prevIndex == -1) return 0; + if (prevIndex == -1 && pNode->pLogStore->syncLogBeginIndex(pNode->pLogStore) == 0) return 0; if (prevIndex > pBuf->matchIndex) { terrno = TSDB_CODE_WAL_LOG_NOT_EXIST; @@ -691,7 +691,6 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod if (pMsg->matchIndex < pNode->pLogBuf->matchIndex) { term = syncLogReplMgrGetPrevLogTerm(pMgr, pNode, index + 1); - if (term < 0 || (term != pMsg->lastMatchTerm && (index + 1 == firstVer || index == firstVer))) { ASSERT(term >= 0 || terrno == TSDB_CODE_WAL_LOG_NOT_EXIST); if (syncNodeStartSnapshot(pNode, &destId) < 0) { diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index 479c1a5af7587b70cb9e1f8943d1c0af6790c137..bf9a6c005103c76a6e8201d4833870527d107f36 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -151,8 +151,8 @@ typedef struct { int64_t retryNextInterval; bool retryInit; int32_t retryStep; - - int8_t epsetRetryCnt; + int8_t epsetRetryCnt; + int32_t retryCode; int hThrdIdx; } STransConnCtx; diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index fbcc1fb525c5cc02410bd3f88a8530aa820bd01b..e92b44f8c229f01d9214b924928de4311b123d3e 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1020,7 +1020,6 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { char tbuf[256] = {0}; EPSET_DEBUG_STR(&pCtx->epSet, tbuf); - tDebug("current epset %s", tbuf); if (!EPSET_IS_VALID(&pCtx->epSet)) { tError("invalid epset"); @@ -1500,34 +1499,46 @@ bool cliGenRetryRule(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { pCtx->retryNextInterval = pCtx->retryMinInterval; pCtx->retryStep = 0; pCtx->retryInit = true; + pCtx->retryCode = TSDB_CODE_SUCCESS; } + if (-1 != pCtx->retryMaxTimeout && taosGetTimestampMs() - pCtx->retryInitTimestamp >= pCtx->retryMaxTimeout) { return false; } + // code, msgType + + // A: epset, leader, not self + // B: epset, not know leader + // C: no epset, leader but not serivce + bool noDelay = false; if (code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - tDebug("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); + tTrace("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); noDelay = cliResetEpset(pCtx, pResp, false); transFreeMsg(pResp->pCont); transUnrefCliHandle(pConn); } else if (code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_SYN_INTERNAL_ERROR || code == TSDB_CODE_SYN_PROPOSE_NOT_READY || code == TSDB_CODE_RPC_REDIRECT) { - tDebug("code str %s, contlen:%d 1", tstrerror(code), pResp->contLen); + tTrace("code str %s, contlen:%d 1", tstrerror(code), pResp->contLen); noDelay = cliResetEpset(pCtx, pResp, true); transFreeMsg(pResp->pCont); addConnToPool(pThrd->pool, pConn); } else if (code == TSDB_CODE_SYN_RESTORING) { - tDebug("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); + tTrace("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); noDelay = cliResetEpset(pCtx, pResp, false); addConnToPool(pThrd->pool, pConn); transFreeMsg(pResp->pCont); } else { - tDebug("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); + tTrace("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); noDelay = cliResetEpset(pCtx, pResp, false); addConnToPool(pThrd->pool, pConn); transFreeMsg(pResp->pCont); } + if (code != TSDB_CODE_RPC_BROKEN_LINK && code != TSDB_CODE_RPC_NETWORK_UNAVAIL && code != TSDB_CODE_SUCCESS) { + // save one internal code + pCtx->retryCode = code; + } if (noDelay == false) { pCtx->epsetRetryCnt = 1; @@ -1556,29 +1567,36 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { STrans* pTransInst = pThrd->pTransInst; if (pMsg == NULL || pMsg->ctx == NULL) { - tDebug("%s conn %p handle resp", pTransInst->label, pConn); + tTrace("%s conn %p handle resp", pTransInst->label, pConn); pTransInst->cfp(pTransInst->parent, pResp, NULL); return 0; } STransConnCtx* pCtx = pMsg->ctx; - int32_t code = pResp->code; bool retry = cliGenRetryRule(pConn, pResp, pMsg); if (retry == true) { return -1; } - STraceId* trace = &pResp->info.traceId; - bool hasEpSet = cliTryExtractEpSet(pResp, &pCtx->epSet); + if (pCtx->retryCode != TSDB_CODE_SUCCESS) { + int32_t code = pResp->code; + // return internal code app + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK) { + pResp->code = pCtx->retryCode; + } + } + + STraceId* trace = &pResp->info.traceId; + bool hasEpSet = cliTryExtractEpSet(pResp, &pCtx->epSet); if (hasEpSet) { char tbuf[256] = {0}; EPSET_DEBUG_STR(&pCtx->epSet, tbuf); - tGDebug("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn); + tGTrace("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn); } if (pCtx->pSem != NULL) { - tGDebug("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn); + tGTrace("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn); if (pCtx->pRsp == NULL) { tGTrace("%s conn %p(sync) failed to resp, ignore", CONN_GET_INST_LABEL(pConn), pConn); } else { @@ -1587,11 +1605,11 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { tsem_post(pCtx->pSem); pCtx->pRsp = NULL; } else { - tGDebug("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn); + tGTrace("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn); if (retry == false && hasEpSet == true) { pTransInst->cfp(pTransInst->parent, pResp, &pCtx->epSet); } else { - if (!cliIsEpsetUpdated(code, pCtx)) { + if (!cliIsEpsetUpdated(pResp->code, pCtx)) { pTransInst->cfp(pTransInst->parent, pResp, NULL); } else { pTransInst->cfp(pTransInst->parent, pResp, &pCtx->epSet); diff --git a/source/os/src/osSemaphore.c b/source/os/src/osSemaphore.c index bfce8b3151ffe051398d89ba5eada720b897c105..2f947d325263d45025a7cff835a715ac108674e7 100644 --- a/source/os/src/osSemaphore.c +++ b/source/os/src/osSemaphore.c @@ -75,20 +75,16 @@ int32_t tsem_wait(tsem_t* sem) { return ret; } -int32_t tsem_timewait(tsem_t* sem, int64_t milis) { - return 0; - /*return tsem_wait(sem);*/ -#if 0 +int32_t tsem_timewait(tsem_t* sem, int64_t ms) { struct timespec ts; - timespec_get(&ts); + taosClockGetTime(0, &ts); + ts.tv_nsec += ms * 1000000; ts.tv_sec += ts.tv_nsec / 1000000000; ts.tv_nsec %= 1000000000; - - /*GetSystemTimeAsFileTime(&ft_before);*/ - // errno = 0; - rc = sem_timedwait(sem, ts); - + int rc; + while ((rc = sem_timedwait(sem, &ts)) == -1 && errno == EINTR) continue; + return rc; /* This should have timed out */ // assert(errno == ETIMEDOUT); // assert(rc != 0); @@ -103,8 +99,6 @@ int32_t tsem_timewait(tsem_t* sem, int64_t milis) { // printf("time must advance during sem_timedwait."); // return 1; // } - return rc; -#endif } #elif defined(_TD_DARWIN_64)