提交 47877898 编写于 作者: H Haojun Liao

enh(stream): generate the checkpoint framework.

上级 61f35857
...@@ -48,6 +48,7 @@ enum { ...@@ -48,6 +48,7 @@ enum {
TASK_STATUS__HALT, // stream task will handle all data in the input queue, and then paused TASK_STATUS__HALT, // stream task will handle all data in the input queue, and then paused
TASK_STATUS__PAUSE, TASK_STATUS__PAUSE,
TASK_STATUS__CK, // stream task is in checkpoint status, no data are allowed to put into inputQ anymore TASK_STATUS__CK, // stream task is in checkpoint status, no data are allowed to put into inputQ anymore
TASK_STATUS__CK_READY,
}; };
enum { enum {
...@@ -310,9 +311,8 @@ struct SStreamTask { ...@@ -310,9 +311,8 @@ struct SStreamTask {
SStreamId historyTaskId; SStreamId historyTaskId;
SStreamId streamTaskId; SStreamId streamTaskId;
SArray* pUpstreamEpInfoList; // SArray<SStreamChildEpInfo*>, // children info SArray* pUpstreamEpInfoList; // SArray<SStreamChildEpInfo*>, // children info
int32_t nextCheckId;
SArray* checkpointInfo; // SArray<SStreamCheckpointInfo> SArray* checkpointInfo; // SArray<SStreamCheckpointInfo>
SArray* pRpcMsgList; // SArray<SRpcMsg*>
// output // output
union { union {
STaskDispatcherFixedEp fixedEpDispatcher; STaskDispatcherFixedEp fixedEpDispatcher;
...@@ -343,6 +343,7 @@ struct SStreamTask { ...@@ -343,6 +343,7 @@ struct SStreamTask {
int32_t refCnt; int32_t refCnt;
int64_t checkpointingId; int64_t checkpointingId;
int32_t checkpointAlignCnt; int32_t checkpointAlignCnt;
int32_t checkpointNotReadyTasks;
struct SStreamMeta* pMeta; struct SStreamMeta* pMeta;
SSHashObj* pNameMap; SSHashObj* pNameMap;
}; };
...@@ -366,7 +367,8 @@ typedef struct SStreamMeta { ...@@ -366,7 +367,8 @@ typedef struct SStreamMeta {
SHashObj* pTaskBackendUnique; SHashObj* pTaskBackendUnique;
TdThreadMutex backendMutex; TdThreadMutex backendMutex;
int64_t checkpointTs; // uint64_t checkpointId;
int32_t notCkptReadyTasks;
SArray* checkpointSaved; SArray* checkpointSaved;
SArray* checkpointInUse; SArray* checkpointInUse;
int32_t checkpointCap; int32_t checkpointCap;
...@@ -486,8 +488,8 @@ typedef struct { ...@@ -486,8 +488,8 @@ typedef struct {
int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq); int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq);
int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq); int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq);
int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp); int32_t tEncodeStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp);
int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp); int32_t tDecodeStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp);
typedef struct { typedef struct {
SMsgHead msgHead; SMsgHead msgHead;
...@@ -509,13 +511,13 @@ typedef struct { ...@@ -509,13 +511,13 @@ typedef struct {
int32_t upstreamTaskId; int32_t upstreamTaskId;
int32_t upstreamNodeId; int32_t upstreamNodeId;
int32_t childId; int32_t childId;
} SStreamCheckpointRsp; } SStreamTaskCheckpointRsp;
int32_t tEncodeStreamTaskCheckpointReq(SEncoder* pEncoder, const SStreamTaskCheckpointReq* pReq); int32_t tEncodeStreamCheckpointReq(SEncoder* pEncoder, const SStreamTaskCheckpointReq* pReq);
int32_t tDecodeStreamTaskCheckpointReq(SDecoder* pDecoder, SStreamTaskCheckpointReq* pReq); int32_t tDecodeStreamCheckpointReq(SDecoder* pDecoder, SStreamTaskCheckpointReq* pReq);
int32_t tEncodeStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp); int32_t tEncodeStreamCheckpointRsp(SEncoder* pEncoder, const SStreamTaskCheckpointRsp* pRsp);
int32_t tDecodeStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp); int32_t tDecodeStreamCheckpointRsp(SDecoder* pDecoder, SStreamTaskCheckpointRsp* pRsp);
typedef struct { typedef struct {
int64_t streamId; int64_t streamId;
...@@ -536,12 +538,6 @@ int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) ...@@ -536,12 +538,6 @@ int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq)
int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp); int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp);
int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp); int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp);
int32_t tEncodeSStreamTaskScanHistoryReq(SEncoder* pEncoder, const SStreamRecoverDownstreamReq* pReq);
int32_t tDecodeSStreamTaskScanHistoryReq(SDecoder* pDecoder, SStreamRecoverDownstreamReq* pReq);
int32_t tEncodeSStreamTaskRecoverRsp(SEncoder* pEncoder, const SStreamRecoverDownstreamRsp* pRsp);
int32_t tDecodeSStreamTaskRecoverRsp(SDecoder* pDecoder, SStreamRecoverDownstreamRsp* pRsp);
int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq); int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq);
int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq); int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq);
void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq); void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq);
...@@ -557,7 +553,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S ...@@ -557,7 +553,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S
int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code); int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code);
int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg); int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg);
// int32_t streamProcessRetrieveRsp(SStreamTask* pTask, SStreamRetrieveRsp* pRsp);
void streamTaskInputFail(SStreamTask* pTask); void streamTaskInputFail(SStreamTask* pTask);
int32_t streamTryExec(SStreamTask* pTask); int32_t streamTryExec(SStreamTask* pTask);
...@@ -627,7 +622,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver); ...@@ -627,7 +622,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver);
// checkpoint // checkpoint
int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq);
int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamTaskCheckpointReq* pReq); int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamTaskCheckpointReq* pReq);
int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp); int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamTaskCheckpointRsp* pRsp);
int32_t streamTaskReleaseState(SStreamTask* pTask); int32_t streamTaskReleaseState(SStreamTask* pTask);
int32_t streamTaskReloadState(SStreamTask* pTask); int32_t streamTaskReloadState(SStreamTask* pTask);
......
...@@ -80,6 +80,7 @@ SArray *smGetMsgHandles() { ...@@ -80,6 +80,7 @@ SArray *smGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
code = 0; code = 0;
_OVER: _OVER:
......
...@@ -750,7 +750,8 @@ SArray *vmGetMsgHandles() { ...@@ -750,7 +750,8 @@ SArray *vmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
......
...@@ -220,6 +220,8 @@ int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); ...@@ -220,6 +220,8 @@ int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg);
int tqUnregisterPushHandle(STQ* pTq, void* pHandle); int tqUnregisterPushHandle(STQ* pTq, void* pHandle);
int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed.
int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen); int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen);
int32_t tqProcessStreamCheckPointReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen);
int32_t tqProcessStreamCheckPointRsp(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen);
int32_t tqCheckStreamStatus(STQ* pTq); int32_t tqCheckStreamStatus(STQ* pTq);
int tqCommit(STQ*); int tqCommit(STQ*);
......
...@@ -217,55 +217,6 @@ void tqNotifyClose(STQ* pTq) { ...@@ -217,55 +217,6 @@ void tqNotifyClose(STQ* pTq) {
} }
} }
//static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch,
// int64_t consumerId, int32_t type) {
// int32_t len = 0;
// int32_t code = 0;
//
// if (type == TMQ_MSG_TYPE__POLL_DATA_RSP) {
// tEncodeSize(tEncodeMqDataRsp, pRsp, len, code);
// } else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) {
// tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code);
// }
//
// if (code < 0) {
// return -1;
// }
//
// int32_t tlen = sizeof(SMqRspHead) + len;
// void* buf = rpcMallocCont(tlen);
// if (buf == NULL) {
// return -1;
// }
//
// ((SMqRspHead*)buf)->mqMsgType = type;
// ((SMqRspHead*)buf)->epoch = epoch;
// ((SMqRspHead*)buf)->consumerId = consumerId;
//
// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));
//
// SEncoder encoder = {0};
// tEncoderInit(&encoder, abuf, len);
//
// if (type == TMQ_MSG_TYPE__POLL_DATA_RSP) {
// tEncodeMqDataRsp(&encoder, pRsp);
// } else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) {
// tEncodeSTaosxRsp(&encoder, (STaosxRsp*)pRsp);
// }
//
// tEncoderClear(&encoder);
//
// SRpcMsg rsp = {
// .info = *pRpcHandleInfo,
// .pCont = buf,
// .contLen = tlen,
// .code = 0,
// };
//
// tmsgSendRsp(&rsp);
// return 0;
//}
int32_t tqPushDataRsp(STqHandle* pHandle, int32_t vgId) { int32_t tqPushDataRsp(STqHandle* pHandle, int32_t vgId) {
SMqDataRsp dataRsp = {0}; SMqDataRsp dataRsp = {0};
dataRsp.head.consumerId = pHandle->consumerId; dataRsp.head.consumerId = pHandle->consumerId;
...@@ -948,6 +899,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { ...@@ -948,6 +899,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
} }
// todo extract method
SEncoder encoder; SEncoder encoder;
int32_t code; int32_t code;
int32_t len; int32_t len;
...@@ -973,7 +925,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { ...@@ -973,7 +925,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
} }
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) {
char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead); int32_t len = pMsg->contLen - sizeof(SMsgHead);
int32_t code; int32_t code;
...@@ -982,7 +934,6 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { ...@@ -982,7 +934,6 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) {
SDecoder decoder; SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)pReq, len); tDecoderInit(&decoder, (uint8_t*)pReq, len);
code = tDecodeStreamTaskCheckRsp(&decoder, &rsp); code = tDecodeStreamTaskCheckRsp(&decoder, &rsp);
if (code < 0) { if (code < 0) {
tDecoderClear(&decoder); tDecoderClear(&decoder);
return -1; return -1;
...@@ -1295,26 +1246,27 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { ...@@ -1295,26 +1246,27 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
} }
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
if (pTask != NULL) { if (pTask == NULL) {
// even in halt status, the data in inputQ must be processed
int8_t status = pTask->status.taskStatus;
if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__HALT) {
tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr,
pTask->chkInfo.version);
streamProcessRunReq(pTask);
} else {
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId,
pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus);
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
tqStartStreamTasks(pTq);
return 0;
} else {
tqError("vgId:%d failed to found s-task, taskId:0x%x", vgId, taskId); tqError("vgId:%d failed to found s-task, taskId:0x%x", vgId, taskId);
return -1; return -1;
} }
// even in halt status, the data in inputQ must be processed
int8_t status = pTask->status.taskStatus;
if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__HALT) {
tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr,
pTask->chkInfo.version);
streamProcessRunReq(pTask);
} else {
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId,
pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus);
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
tqStartStreamTasks(pTq);
return 0;
} }
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) {
...@@ -1554,16 +1506,23 @@ int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, int64_t sversion, char* pMs ...@@ -1554,16 +1506,23 @@ int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, int64_t sversion, char* pMs
if (tDecodeStreamCheckpointSourceReq(&decoder, &req) < 0) { if (tDecodeStreamCheckpointSourceReq(&decoder, &req) < 0) {
code = TSDB_CODE_MSG_DECODE_ERROR; code = TSDB_CODE_MSG_DECODE_ERROR;
tDecoderClear(&decoder); tDecoderClear(&decoder);
tqError("vgId:%d failed to decode checkpoint source msg, code:%s", vgId, tstrerror(code));
goto FAIL; goto FAIL;
} }
tDecoderClear(&decoder); tDecoderClear(&decoder);
// todo handle this bug: task not in ready state.
SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.taskId); SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.taskId);
if (pTask == NULL) { if (pTask == NULL) {
tqError("vgId:%d failed to find s-task:0x%x , it may have been destroyed already", vgId, req.taskId); tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. it may have been destroyed already", vgId, req.taskId);
goto FAIL; goto FAIL;
} }
// backup the rpchandle for rsp
SRpcMsg* pRpcMsg = taosMemoryCalloc(1, sizeof(SRpcMsg));
memcpy(pRpcMsg, (SRpcMsg*)pMsg, sizeof(SRpcMsg));
taosArrayPush(pTask->pRpcMsgList, &pRpcMsg);
streamProcessCheckpointSourceReq(pMeta, pTask, &req); streamProcessCheckpointSourceReq(pMeta, pTask, &req);
streamMetaReleaseTask(pMeta, pTask); streamMetaReleaseTask(pMeta, pTask);
return code; return code;
...@@ -1571,3 +1530,70 @@ int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, int64_t sversion, char* pMs ...@@ -1571,3 +1530,70 @@ int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, int64_t sversion, char* pMs
FAIL: FAIL:
return code; return code;
} }
int32_t tqProcessStreamCheckPointReq(STQ* pTq, int64_t sversion, char* pMsg, int32_t msgLen) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
char* msg = POINTER_SHIFT(pMsg, sizeof(SMsgHead));
int32_t len = msgLen - sizeof(SMsgHead);
int32_t code = 0;
SStreamTaskCheckpointReq req= {0};
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, len);
if (tDecodeStreamCheckpointReq(&decoder, &req) < 0) {
code = TSDB_CODE_MSG_DECODE_ERROR;
tDecoderClear(&decoder);
goto FAIL;
}
tDecoderClear(&decoder);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.downstreamTaskId);
if (pTask == NULL) {
tqError("vgId:%d failed to find s-task:0x%x , it may have been destroyed already", vgId, req.downstreamTaskId);
goto FAIL;
}
streamProcessCheckpointReq(pMeta, pTask, &req);
streamMetaReleaseTask(pMeta, pTask);
return code;
FAIL:
return code;
}
// downstream task has complete the stream task checkpoint procedure
int32_t tqProcessStreamCheckPointRsp(STQ* pTq, int64_t sversion, char* pMsg, int32_t msgLen) {
// if this task is an agg task, rsp this message to upstream directly.
// if this task is an source task, send source rsp to mnode
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
char* msg = POINTER_SHIFT(pMsg, sizeof(SMsgHead));
int32_t len = msgLen - sizeof(SMsgHead);
int32_t code = 0;
SStreamTaskCheckpointRsp req= {0};
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, len);
if (tDecodeStreamCheckpointRsp(&decoder, &req) < 0) {
code = TSDB_CODE_MSG_DECODE_ERROR;
tDecoderClear(&decoder);
goto FAIL;
}
tDecoderClear(&decoder);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.downstreamTaskId);
if (pTask == NULL) {
tqError("vgId:%d failed to find s-task:0x%x , it may have been destroyed already", vgId, req.downstreamTaskId);
goto FAIL;
}
streamProcessCheckpointRsp(pMeta, pTask, &req);
streamMetaReleaseTask(pMeta, pTask);
return code;
FAIL:
return code;
}
...@@ -503,6 +503,17 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg ...@@ -503,6 +503,17 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
goto _err; goto _err;
} }
} break; } break;
case TDMT_STREAM_TASK_CHECKPOINT: {
if (tqProcessStreamCheckPointReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) {
goto _err;
}
} break;
case TDMT_STREAM_TASK_CHECKPOINT_RSP: {
if (tqProcessStreamCheckPointRsp(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) {
goto _err;
}
} break;
case TDMT_VND_ALTER_CONFIRM: case TDMT_VND_ALTER_CONFIRM:
needCommit = pVnode->config.hashChange; needCommit = pVnode->config.hashChange;
if (vnodeProcessAlterConfirmReq(pVnode, ver, pReq, len, pRsp) < 0) { if (vnodeProcessAlterConfirmReq(pVnode, ver, pReq, len, pRsp) < 0) {
......
...@@ -47,7 +47,7 @@ typedef struct { ...@@ -47,7 +47,7 @@ typedef struct {
void* streamBackendInit(const char* path); void* streamBackendInit(const char* path);
void streamBackendCleanup(void* arg); void streamBackendCleanup(void* arg);
void streamBackendHandleCleanup(void* arg); void streamBackendHandleCleanup(void* arg);
int32_t streamBackendDoCheckpoint(void* pMeta, const char* path); int32_t streamBackendDoCheckpoint(void* pMeta, uint64_t checkpointId);
SListNode* streamBackendAddCompare(void* backend, void* arg); SListNode* streamBackendAddCompare(void* backend, void* arg);
void streamBackendDelCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg);
......
...@@ -32,11 +32,13 @@ typedef struct { ...@@ -32,11 +32,13 @@ typedef struct {
} SStreamGlobalEnv; } SStreamGlobalEnv;
extern SStreamGlobalEnv streamEnv; extern SStreamGlobalEnv streamEnv;
extern int32_t streamBackendId;
extern int32_t streamBackendCfWrapperId;
void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration); void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration);
int32_t streamDispatchStreamBlock(SStreamTask* pTask); int32_t streamDispatchStreamBlock(SStreamTask* pTask);
SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg);
SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamTask* pTask, int64_t resultSize, SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamTask* pTask, int64_t resultSize,
SArray* pRes); SArray* pRes);
void destroyStreamDataBlock(SStreamDataBlock* pBlock); void destroyStreamDataBlock(SStreamDataBlock* pBlock);
...@@ -49,15 +51,12 @@ int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* p ...@@ -49,15 +51,12 @@ int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* p
int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData); int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData);
int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet);
int32_t streamDispatchCheckpointMsg(SStreamTask* pTask, const SStreamTaskCheckpointReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamDispatchCheckpointMsg(SStreamTask* pTask, const SStreamTaskCheckpointReq* pReq, int32_t nodeId, SEpSet* pEpSet);
int32_t streamTaskSendCheckpointRsp(SStreamTask* pTask, int32_t vgId);
int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask, int32_t vgId);
int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, const char* id);
SEpSet* pEpSet);
SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem);
extern int32_t streamBackendId;
extern int32_t streamBackendCfWrapperId;
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
...@@ -146,7 +146,7 @@ int32_t streamSchedExec(SStreamTask* pTask) { ...@@ -146,7 +146,7 @@ int32_t streamSchedExec(SStreamTask* pTask) {
int32_t streamTaskEnqueueBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq, SRpcMsg* pRsp) { int32_t streamTaskEnqueueBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq, SRpcMsg* pRsp) {
int8_t status = 0; int8_t status = 0;
SStreamDataBlock* pBlock = createStreamDataFromDispatchMsg(pReq, STREAM_INPUT__DATA_BLOCK, pReq->dataSrcVgId); SStreamDataBlock* pBlock = createStreamBlockFromDispatchMsg(pReq, STREAM_INPUT__DATA_BLOCK, pReq->dataSrcVgId);
if (pBlock == NULL) { if (pBlock == NULL) {
streamTaskInputFail(pTask); streamTaskInputFail(pTask);
status = TASK_INPUT_STATUS__FAILED; status = TASK_INPUT_STATUS__FAILED;
......
...@@ -401,20 +401,29 @@ int32_t delObsoleteCheckpoint(void* arg, const char* path) { ...@@ -401,20 +401,29 @@ int32_t delObsoleteCheckpoint(void* arg, const char* path) {
taosArrayDestroy(checkpointDel); taosArrayDestroy(checkpointDel);
return 0; return 0;
} }
int32_t streamBackendDoCheckpoint(void* arg, const char* path) {
int32_t streamBackendDoCheckpoint(void* arg, uint64_t checkpointId) {
SStreamMeta* pMeta = arg; SStreamMeta* pMeta = arg;
int64_t backendRid = pMeta->streamBackendRid; int64_t backendRid = pMeta->streamBackendRid;
int64_t checkpointId = pMeta->checkpointTs;
int64_t st = taosGetTimestampMs(); int64_t st = taosGetTimestampMs();
int32_t code = -1; int32_t code = -1;
SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid);
char path[256] = {0};
sprintf(path, "%s/%s", pMeta->path, "checkpoints");
code = taosMulModeMkDir(path, 0755);
if (code != 0) {
qError("failed to prepare checkpoint dir, path:%s, reason:%s", path, tstrerror(code));
return code;
}
char checkpointDir[256] = {0}; char checkpointDir[256] = {0};
sprintf(checkpointDir, "%s/checkpoint_%" PRId64 "", path, checkpointId); snprintf(checkpointDir, tListLen(checkpointDir),"%s/checkpoint_%" PRIu64, path, checkpointId);
SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid);
if (pHandle == NULL) { if (pHandle == NULL) {
return -1; return -1;
} }
qDebug("stream backend:%p start to do checkpoint at:%s ", pHandle, path); qDebug("stream backend:%p start to do checkpoint at:%s ", pHandle, path);
if (pHandle->db != NULL) { if (pHandle->db != NULL) {
char* err = NULL; char* err = NULL;
...@@ -442,10 +451,12 @@ int32_t streamBackendDoCheckpoint(void* arg, const char* path) { ...@@ -442,10 +451,12 @@ int32_t streamBackendDoCheckpoint(void* arg, const char* path) {
taosWUnLockLatch(&pMeta->checkpointDirLock); taosWUnLockLatch(&pMeta->checkpointDirLock);
delObsoleteCheckpoint(arg, path); delObsoleteCheckpoint(arg, path);
_ERROR: _ERROR:
taosReleaseRef(streamBackendId, backendRid); taosReleaseRef(streamBackendId, backendRid);
return code; return code;
} }
SListNode* streamBackendAddCompare(void* backend, void* arg) { SListNode* streamBackendAddCompare(void* backend, void* arg) {
SBackendWrapper* pHandle = (SBackendWrapper*)backend; SBackendWrapper* pHandle = (SBackendWrapper*)backend;
SListNode* node = NULL; SListNode* node = NULL;
......
...@@ -37,7 +37,7 @@ int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSo ...@@ -37,7 +37,7 @@ int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSo
return 0; return 0;
} }
int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) { int32_t tEncodeStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) {
if (tStartEncode(pEncoder) < 0) return -1; if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1;
...@@ -48,7 +48,7 @@ int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheck ...@@ -48,7 +48,7 @@ int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheck
return pEncoder->pos; return pEncoder->pos;
} }
int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) { int32_t tDecodeStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) {
if (tStartDecode(pDecoder) < 0) return -1; if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1;
...@@ -59,7 +59,7 @@ int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointS ...@@ -59,7 +59,7 @@ int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointS
return 0; return 0;
} }
int32_t tEncodeStreamTaskCheckpointReq(SEncoder* pEncoder, const SStreamTaskCheckpointReq* pReq) { int32_t tEncodeStreamCheckpointReq(SEncoder* pEncoder, const SStreamTaskCheckpointReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1; if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1;
...@@ -72,7 +72,7 @@ int32_t tEncodeStreamTaskCheckpointReq(SEncoder* pEncoder, const SStreamTaskChec ...@@ -72,7 +72,7 @@ int32_t tEncodeStreamTaskCheckpointReq(SEncoder* pEncoder, const SStreamTaskChec
return pEncoder->pos; return pEncoder->pos;
} }
int32_t tDecodeStreamTaskCheckpointReq(SDecoder* pDecoder, SStreamTaskCheckpointReq* pReq) { int32_t tDecodeStreamCheckpointReq(SDecoder* pDecoder, SStreamTaskCheckpointReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1; if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1;
...@@ -85,7 +85,7 @@ int32_t tDecodeStreamTaskCheckpointReq(SDecoder* pDecoder, SStreamTaskCheckpoint ...@@ -85,7 +85,7 @@ int32_t tDecodeStreamTaskCheckpointReq(SDecoder* pDecoder, SStreamTaskCheckpoint
return 0; return 0;
} }
int32_t tEncodeStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp) { int32_t tEncodeStreamCheckpointRsp(SEncoder* pEncoder, const SStreamTaskCheckpointRsp* pRsp) {
if (tStartEncode(pEncoder) < 0) return -1; if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1;
...@@ -98,7 +98,7 @@ int32_t tEncodeStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRs ...@@ -98,7 +98,7 @@ int32_t tEncodeStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRs
return pEncoder->pos; return pEncoder->pos;
} }
int32_t tDecodeStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp) { int32_t tDecodeStreamCheckpointRsp(SDecoder* pDecoder, SStreamTaskCheckpointRsp* pRsp) {
if (tStartDecode(pDecoder) < 0) return -1; if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1;
...@@ -112,13 +112,14 @@ int32_t tDecodeStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRs ...@@ -112,13 +112,14 @@ int32_t tDecodeStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRs
} }
static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) { static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) {
if (pTask->checkpointingId == 0) { int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList);
int64_t old = atomic_val_compare_exchange_64(&pTask->checkpointingId, 0, num);
if (old == 0) {
qDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num);
pTask->checkpointingId = checkpointId; pTask->checkpointingId = checkpointId;
pTask->checkpointAlignCnt = taosArrayGetSize(pTask->pUpstreamEpInfoList);
} }
ASSERT(pTask->checkpointingId == checkpointId); ASSERT(pTask->checkpointingId == checkpointId);
return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1); return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1);
} }
...@@ -159,77 +160,49 @@ static int32_t streamTaskDispatchCheckpointMsg(SStreamTask* pTask, uint64_t chec ...@@ -159,77 +160,49 @@ static int32_t streamTaskDispatchCheckpointMsg(SStreamTask* pTask, uint64_t chec
pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, i); pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, i);
streamDispatchCheckpointMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); streamDispatchCheckpointMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
} }
} else {
qDebug("s-task:%s (vgId:%d) sink task set to be ready for checkpointing", pTask->id.idStr, pTask->info.nodeId);
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SINK);
streamTaskLaunchScanHistory(pTask);
} }
return 0; return 0;
} }
// set status check pointing static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask) {
// do checkpoint SStreamCheckpoint* pChkpoint = taosAllocateQitem(sizeof(SStreamCheckpoint), DEF_QITEM, sizeof(SSDataBlock));
static int32_t streamDoSourceCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, uint64_t checkpointId) { if (pChkpoint == NULL) {
int code = 0; return TSDB_CODE_OUT_OF_MEMORY;
char buf[256] = {0};
int64_t ts = taosGetTimestampMs();
sprintf(buf, "%s/%s", pMeta->path, "checkpoints");
code = taosMulModeMkDir(buf, 0755);
if (code != 0) {
qError("failed to prepare checkpoint %s, checkpointId:%" PRIu64 ", reason:%s", buf, checkpointId, tstrerror(code));
return code;
} }
pMeta->checkpointTs = ts; pChkpoint->type = STREAM_INPUT__CHECKPOINT;
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); pChkpoint->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock));
if (pChkpoint->pBlock == NULL) {
// 1. set task status to be prepared for check point taosFreeQitem(pChkpoint);
pTask->status.taskStatus = TASK_STATUS__CK; return TSDB_CODE_OUT_OF_MEMORY;
// 2. put the checkpoint data block into the inputQ, to enable the local status to be flushed to storage backend
{
SStreamCheckpoint* pChkpoint = taosAllocateQitem(sizeof(SStreamCheckpoint), DEF_QITEM, sizeof(SSDataBlock));
if (pChkpoint == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
pChkpoint->type = STREAM_INPUT__CHECKPOINT;
pChkpoint->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock));
if (pChkpoint->pBlock == NULL) {
taosFreeQitem(pChkpoint);
return TSDB_CODE_OUT_OF_MEMORY;
}
pChkpoint->pBlock->info.type = STREAM_CHECKPOINT;
if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pChkpoint) < 0) {
taosFreeQitem(pChkpoint);
return TSDB_CODE_OUT_OF_MEMORY;
}
streamSchedExec(pTask);
} }
// 2. dispatch checkpoint msg to downstream task pChkpoint->pBlock->info.type = STREAM_CHECKPOINT;
streamTaskDispatchCheckpointMsg(pTask, checkpointId); if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pChkpoint) < 0) {
taosFreeQitem(pChkpoint);
return TSDB_CODE_OUT_OF_MEMORY;
}
// code = streamBackendDoCheckpoint((void*)pMeta, buf); streamSchedExec(pTask);
return code; return TSDB_CODE_SUCCESS;
} }
int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) { int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) {
int32_t code = 0; int32_t code = 0;
int64_t checkpointId = pReq->checkpointId; int64_t checkpointId = pReq->checkpointId;
code = streamDoSourceCheckpoint(pMeta, pTask, checkpointId); qDebug("s-task:%s level:%d receive the checkpoint msg id:%" PRId64 " from mnode", pTask->id.idStr,
if (code < 0) { pTask->info.taskLevel, checkpointId);
// rsp error ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE);
return -1;
}
return 0; // 1. set task status to be prepared for check point, no data are allowed to put into inputQ.
pTask->status.taskStatus = TASK_STATUS__CK;
pTask->checkpointNotReadyTasks = 1;
// 2. let's dispatch checkpoint msg to downstream task directly and do nothing else.
streamTaskDispatchCheckpointMsg(pTask, checkpointId);
return code;
} }
int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamTaskCheckpointReq* pReq) { int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamTaskCheckpointReq* pReq) {
...@@ -237,31 +210,55 @@ int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStre ...@@ -237,31 +210,55 @@ int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStre
int64_t checkpointId = pReq->checkpointId; int64_t checkpointId = pReq->checkpointId;
int32_t childId = pReq->childId; int32_t childId = pReq->childId;
if (taosArrayGetSize(pTask->pUpstreamEpInfoList) > 0) { // set the task status
code = streamAlignCheckpoint(pTask, checkpointId, childId); pTask->status.taskStatus = TASK_STATUS__CK;
if (code > 0) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG || pTask->info.taskLevel == TASK_LEVEL__SINK);
if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
qDebug("s-task:%s sink task set to checkpoint ready", pTask->id.idStr);
appendCheckpointIntoInputQ(pTask);
streamSchedExec(pTask);
} else {
// todo close the inputQ for data from childId, which means data from childId are not allowed to put into intpuQ
// anymore
ASSERT(taosArrayGetSize(pTask->pUpstreamEpInfoList) > 0);
// there are still some upstream tasks not send checkpoint request
int32_t notReady = streamAlignCheckpoint(pTask, checkpointId, childId);
if (notReady > 0) {
int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList);
qDebug("s-task:%s %d upstream tasks not send checkpoint info yet, total:%d", pTask->id.idStr, notReady, num);
return 0; return 0;
} }
if (code < 0) {
ASSERT(0);
return -1;
}
}
// code = streamDoCheckpoint(pMeta, pTask, checkpointId); qDebug("s-task:%s all upstream send checkpoint msg now, dispatch checkpoint msg to downstream", pTask->id.idStr);
if (code < 0) { pTask->checkpointNotReadyTasks = (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH)
// rsp error ? 1
return -1; : taosArrayGetSize(pTask->shuffleDispatcher.dbInfo.pVgroupInfos);
}
// send rsp to all children // if all upstreams are ready for generating checkpoint, set the status to be TASK_STATUS__CK_READY
// 2. dispatch check point msg to all downstream tasks
streamTaskDispatchCheckpointMsg(pTask, checkpointId);
}
return 0; return 0;
} }
int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp) { /**
// recover step2, scan from wal * All down stream tasks have successfully completed the check point task.
// unref wal * Current stream task is allowed to start to do checkpoint things in ASYNC model.
// set status normal */
int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamTaskCheckpointRsp* pRsp) {
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG);
// only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task
int32_t notReady = atomic_sub_fetch_32(&pTask->checkpointNotReadyTasks, 1);
if (notReady == 0) {
qDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for this task",
pTask->id.idStr);
appendCheckpointIntoInputQ(pTask);
streamSchedExec(pTask);
}
return 0; return 0;
} }
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "streamInt.h" #include "streamInt.h"
SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg) { SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg) {
SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, pReq->totalLen); SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, pReq->totalLen);
if (pData == NULL) { if (pData == NULL) {
return NULL; return NULL;
......
...@@ -16,8 +16,6 @@ ...@@ -16,8 +16,6 @@
#include "streamInt.h" #include "streamInt.h"
// maximum allowed processed block batches. One block may include several submit blocks // maximum allowed processed block batches. One block may include several submit blocks
#define MAX_STREAM_EXEC_BATCH_NUM 32
#define MIN_STREAM_EXEC_BATCH_NUM 4
#define MAX_STREAM_RESULT_DUMP_THRESHOLD 100 #define MAX_STREAM_RESULT_DUMP_THRESHOLD 100
static int32_t updateCheckPointInfo(SStreamTask* pTask); static int32_t updateCheckPointInfo(SStreamTask* pTask);
...@@ -399,61 +397,6 @@ static int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { ...@@ -399,61 +397,6 @@ static int32_t streamTransferStateToStreamTask(SStreamTask* pTask) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
static int32_t extractMsgFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks,
const char* id) {
int32_t retryTimes = 0;
int32_t MAX_RETRY_TIMES = 5;
while (1) {
if (streamTaskShouldPause(&pTask->status)) {
qDebug("s-task:%s task should pause, input blocks:%d", pTask->id.idStr, *numOfBlocks);
return TSDB_CODE_SUCCESS;
}
SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue);
if (qItem == NULL) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) {
taosMsleep(10);
qDebug("===stream===try again batchSize:%d, retry:%d", *numOfBlocks, retryTimes);
continue;
}
qDebug("===stream===break batchSize:%d", *numOfBlocks);
return TSDB_CODE_SUCCESS;
}
// do not merge blocks for sink node
if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
*numOfBlocks = 1;
*pInput = qItem;
return TSDB_CODE_SUCCESS;
}
if (*pInput == NULL) {
ASSERT((*numOfBlocks) == 0);
*pInput = qItem;
} else {
// todo we need to sort the data block, instead of just appending into the array list.
void* newRet = streamMergeQueueItem(*pInput, qItem);
if (newRet == NULL) {
qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks);
streamQueueProcessFail(pTask->inputQueue);
return TSDB_CODE_SUCCESS;
}
*pInput = newRet;
}
*numOfBlocks += 1;
streamQueueProcessSuccess(pTask->inputQueue);
if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) {
qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM);
return TSDB_CODE_SUCCESS;
}
}
}
/** /**
* todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the * todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the
* appropriate batch of blocks should be handled in 5 to 10 sec. * appropriate batch of blocks should be handled in 5 to 10 sec.
...@@ -468,21 +411,28 @@ int32_t streamExecForAll(SStreamTask* pTask) { ...@@ -468,21 +411,28 @@ int32_t streamExecForAll(SStreamTask* pTask) {
// merge multiple input data if possible in the input queue. // merge multiple input data if possible in the input queue.
qDebug("s-task:%s start to extract data block from inputQ", id); qDebug("s-task:%s start to extract data block from inputQ", id);
/*int32_t code = */extractMsgFromInputQ(pTask, &pInput, &batchSize, id); /*int32_t code = */extractBlocksFromInputQ(pTask, &pInput, &batchSize, id);
if (pInput == NULL) { if (pInput == NULL) {
ASSERT(batchSize == 0); ASSERT(batchSize == 0);
if (pTask->info.fillHistory && pTask->status.transferState) { if (pTask->info.fillHistory && pTask->status.transferState) {
int32_t code = streamTransferStateToStreamTask(pTask); int32_t code = streamTransferStateToStreamTask(pTask);
} }
break; // no data in the inputQ, return now
return 0;
} }
if (pTask->info.taskLevel == TASK_LEVEL__SINK) { if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
ASSERT(pInput->type == STREAM_INPUT__DATA_BLOCK); if (pInput->type == STREAM_INPUT__DATA_BLOCK) {
qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize); qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize);
streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput); streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput);
continue; continue;
}
} else {
ASSERT(pInput->type == STREAM_INPUT__CHECKPOINT);
ASSERT(pTask->status.taskStatus == TASK_STATUS__CK);
pTask->status.taskStatus = TASK_STATUS__CK_READY;
return 0;
} }
int64_t st = taosGetTimestampMs(); int64_t st = taosGetTimestampMs();
...@@ -519,6 +469,9 @@ int32_t streamExecForAll(SStreamTask* pTask) { ...@@ -519,6 +469,9 @@ int32_t streamExecForAll(SStreamTask* pTask) {
} else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { } else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) {
const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)pInput; const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)pInput;
qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
} else if (pItem->type == STREAM_CHECKPOINT) {
const SStreamCheckpoint* pCheckpoint = (const SStreamCheckpoint*) pInput;
qSetMultiStreamInput(pExecutor, pCheckpoint->pBlock, 1, STREAM_INPUT__CHECKPOINT);
} else { } else {
ASSERT(0); ASSERT(0);
} }
...@@ -533,9 +486,14 @@ int32_t streamExecForAll(SStreamTask* pTask) { ...@@ -533,9 +486,14 @@ int32_t streamExecForAll(SStreamTask* pTask) {
id, el, resSize / 1048576.0, totalBlocks); id, el, resSize / 1048576.0, totalBlocks);
streamFreeQitem(pInput); streamFreeQitem(pInput);
}
return 0; // do nothing after sync executor state to storage backend, untill the vnode-level checkpoint is completed.
if (pInput->type == STREAM_INPUT__CHECKPOINT) {
ASSERT(pTask->status.taskStatus == TASK_STATUS__CK);
pTask->status.taskStatus = TASK_STATUS__CK_READY;
return 0;
}
}
} }
bool streamTaskIsIdle(const SStreamTask* pTask) { bool streamTaskIsIdle(const SStreamTask* pTask) {
...@@ -574,9 +532,27 @@ int32_t streamTryExec(SStreamTask* pTask) { ...@@ -574,9 +532,27 @@ int32_t streamTryExec(SStreamTask* pTask) {
qDebug("s-task:%s exec completed, status:%s, sched-status:%d", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), qDebug("s-task:%s exec completed, status:%s, sched-status:%d", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus),
pTask->status.schedStatus); pTask->status.schedStatus);
if (!taosQueueEmpty(pTask->inputQueue->queue) && (!streamTaskShouldStop(&pTask->status)) && if (pTask->status.taskStatus == TASK_STATUS__CK_READY) {
(!streamTaskShouldPause(&pTask->status))) { // check for all tasks, and do generate the vnode-wide checkpoint data.
streamSchedExec(pTask); // todo extract method
SStreamMeta* pMeta = pTask->pMeta;
int32_t remain = atomic_sub_fetch_32(&pMeta->notCkptReadyTasks, 1);
if (remain <= 0) { // all tasks are in TASK_STATUS__CK_READY state
streamBackendDoCheckpoint(pMeta, pTask->checkpointingId);
}
// send check point response to upstream task
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
streamTaskSendCheckpointSourceRsp(pTask, pMeta->vgId);
} else {
streamTaskSendCheckpointRsp(pTask, pMeta->vgId);
}
} else {
if (!taosQueueEmpty(pTask->inputQueue->queue) && (!streamTaskShouldStop(&pTask->status)) &&
(!streamTaskShouldPause(&pTask->status))) {
streamSchedExec(pTask);
}
} }
} }
......
...@@ -107,8 +107,8 @@ SStreamQueueRes streamQueueGetRes(SStreamQueue1* pQueue) { ...@@ -107,8 +107,8 @@ SStreamQueueRes streamQueueGetRes(SStreamQueue1* pQueue) {
} }
#endif #endif
#define MAX_STREAM_EXEC_BATCH_NUM 128 #define MAX_STREAM_EXEC_BATCH_NUM 32
#define MIN_STREAM_EXEC_BATCH_NUM 16 #define MIN_STREAM_EXEC_BATCH_NUM 4
// todo refactor: // todo refactor:
// read data from input queue // read data from input queue
...@@ -119,6 +119,7 @@ typedef struct SQueueReader { ...@@ -119,6 +119,7 @@ typedef struct SQueueReader {
int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms
} SQueueReader; } SQueueReader;
#if 0
SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char* idstr) { SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char* idstr) {
int32_t numOfBlocks = 0; int32_t numOfBlocks = 0;
int32_t tryCount = 0; int32_t tryCount = 0;
...@@ -164,3 +165,58 @@ SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char* ...@@ -164,3 +165,58 @@ SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char*
return pRet; return pRet;
} }
#endif
int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, const char* id) {
int32_t retryTimes = 0;
int32_t MAX_RETRY_TIMES = 5;
while (1) {
if (streamTaskShouldPause(&pTask->status)) {
qDebug("s-task:%s task should pause, input blocks:%d", pTask->id.idStr, *numOfBlocks);
return TSDB_CODE_SUCCESS;
}
SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue);
if (qItem == NULL) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) {
taosMsleep(10);
qDebug("===stream===try again batchSize:%d, retry:%d", *numOfBlocks, retryTimes);
continue;
}
qDebug("===stream===break batchSize:%d", *numOfBlocks);
return TSDB_CODE_SUCCESS;
}
// do not merge blocks for sink node and check point data block
if ((pTask->info.taskLevel == TASK_LEVEL__SINK) || (qItem->type == STREAM_INPUT__CHECKPOINT)) {
*numOfBlocks = 1;
*pInput = qItem;
return TSDB_CODE_SUCCESS;
}
if (*pInput == NULL) {
ASSERT((*numOfBlocks) == 0);
*pInput = qItem;
} else {
// todo we need to sort the data block, instead of just appending into the array list.
void* newRet = streamMergeQueueItem(*pInput, qItem);
if (newRet == NULL) {
qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks);
streamQueueProcessFail(pTask->inputQueue);
return TSDB_CODE_SUCCESS;
}
*pInput = newRet;
}
*numOfBlocks += 1;
streamQueueProcessSuccess(pTask->inputQueue);
if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) {
qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM);
return TSDB_CODE_SUCCESS;
}
}
}
...@@ -286,29 +286,6 @@ int32_t streamSourceScanHistoryData(SStreamTask* pTask) { ...@@ -286,29 +286,6 @@ int32_t streamSourceScanHistoryData(SStreamTask* pTask) {
return streamScanExec(pTask, 100); return streamScanExec(pTask, 100);
} }
int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) {
SStreamScanHistoryFinishReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId };
// serialize
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
req.taskId = pTask->fixedEpDispatcher.taskId;
streamDoDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t numOfVgs = taosArrayGetSize(vgInfo);
qDebug("s-task:%s send scan-history-data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr,
numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus));
for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
req.taskId = pVgInfo->taskId;
streamDoDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
}
}
return 0;
}
static int32_t doDispatchTransferMsg(SStreamTask* pTask, const SStreamTransferReq* pReq, int32_t vgId, SEpSet* pEpSet) { static int32_t doDispatchTransferMsg(SStreamTask* pTask, const SStreamTransferReq* pReq, int32_t vgId, SEpSet* pEpSet) {
void* buf = NULL; void* buf = NULL;
int32_t code = -1; int32_t code = -1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册