diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index bff65d352755043292b8056f20ae9e27b0c27a8d..6fb3d980274b835af8636a635a8612fb36c3ad4f 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -325,11 +325,13 @@ int32_t updateQnodeList(SAppInstInfo* pInfo, SArray* pNodeList) { if (pInfo->pQnodeList) { taosArrayDestroy(pInfo->pQnodeList); pInfo->pQnodeList = NULL; + tscDebug("QnodeList cleared in cluster 0x%" PRIx64, pInfo->clusterId); } if (pNodeList) { pInfo->pQnodeList = taosArrayDup(pNodeList); taosArraySort(pInfo->pQnodeList, compareQueryNodeLoad); + tscDebug("QnodeList updated in cluster 0x%" PRIx64 ", num:%d", pInfo->clusterId, taosArrayGetSize(pInfo->pQnodeList)); } taosThreadMutexUnlock(&pInfo->qnodeMutex); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 192a41a70ed5563c672e91db2ee1322907f99a17..6c202c6202c1ae4e8156864d37b129da24b6d56a 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -381,9 +381,11 @@ static int32_t tDeserializeSClientHbRsp(SDecoder *pDecoder, SClientHbRsp *pRsp) if (pQnodeNum > 0) { pRsp->query->pQnodeList = taosArrayInit(pQnodeNum, sizeof(SQueryNodeLoad)); if (NULL == pRsp->query->pQnodeList) return -1; - SQueryNodeLoad load = {0}; - if (tDecodeSQueryNodeLoad(pDecoder, &load) < 0) return -1; - taosArrayPush(pRsp->query->pQnodeList, &load); + for (int32_t i = 0; i < pQnodeNum; ++i) { + SQueryNodeLoad load = {0}; + if (tDecodeSQueryNodeLoad(pDecoder, &load) < 0) return -1; + taosArrayPush(pRsp->query->pQnodeList, &load); + } } } diff --git a/source/dnode/qnode/src/qnode.c b/source/dnode/qnode/src/qnode.c index 03d14d7ab4cabee1cae619d167759c0f32485171..b65189153ea4f0aa36680586e472eac4007a457f 100644 --- a/source/dnode/qnode/src/qnode.c +++ b/source/dnode/qnode/src/qnode.c @@ -64,16 +64,11 @@ int32_t qndGetLoad(SQnode *pQnode, SQnodeLoad *pLoad) { return 0; } -int32_t qnodeCrash = 1; int32_t qndPreprocessQueryMsg(SQnode *pQnode, SRpcMsg * pMsg) { if (TDMT_SCH_QUERY != pMsg->msgType && TDMT_SCH_MERGE_QUERY != pMsg->msgType) { return 0; } - if (qnodeCrash) { - ASSERT(0); - } - return qWorkerPreprocessQueryMsg(pQnode->pQuery, pMsg); } diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index 591301ef44bfa423223286ade30496af4de3104c..052fdefa61e1e59412b2d755a3ec87831714c041 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -35,7 +35,6 @@ extern "C" { #define SCH_DEFAULT_TASK_TIMEOUT_USEC 10000000 #define SCH_MAX_TASK_TIMEOUT_USEC 60000000 -#define SCH_TASK_MAX_EXEC_TIMES 8 #define SCH_MAX_CANDIDATE_EP_NUM TSDB_MAX_REPLICA enum { @@ -179,10 +178,10 @@ typedef struct SSchLevel { } SSchLevel; typedef struct SSchTaskProfile { - int64_t startTs; - int64_t execUseTime[SCH_TASK_MAX_EXEC_TIMES]; - int64_t waitTime; - int64_t endTs; + int64_t startTs; + int64_t* execTime; + int64_t waitTime; + int64_t endTs; } SSchTaskProfile; typedef struct SSchTask { @@ -260,33 +259,7 @@ typedef struct SSchJob { extern SSchedulerMgmt schMgmt; -#define SCH_LOG_TASK_START_TS(_task) \ - do { \ - int64_t us = taosGetTimestampUs(); \ - int32_t idx = (_task)->execId % SCH_TASK_MAX_EXEC_TIMES; \ - (_task)->profile.execUseTime[idx] = us; \ - if (0 == (_task)->execId) { \ - (_task)->profile.startTs = us; \ - } \ - } while (0) - -#define SCH_LOG_TASK_WAIT_TS(_task) \ - do { \ - int64_t us = taosGetTimestampUs(); \ - int32_t idx = (_task)->execId % SCH_TASK_MAX_EXEC_TIMES; \ - (_task)->profile.waitTime += us - (_task)->profile.execUseTime[idx]; \ - } while (0) - - -#define SCH_LOG_TASK_END_TS(_task) \ - do { \ - int64_t us = taosGetTimestampUs(); \ - int32_t idx = (_task)->execId % SCH_TASK_MAX_EXEC_TIMES; \ - (_task)->profile.execUseTime[idx] = us - (_task)->profile.execUseTime[idx]; \ - (_task)->profile.endTs = us; \ - } while (0) - -#define SCH_TASK_TIMEOUT(_task) ((taosGetTimestampUs() - (_task)->profile.execUseTime[(_task)->execId % SCH_TASK_MAX_EXEC_TIMES]) > (_task)->timeoutUsec) +#define SCH_TASK_TIMEOUT(_task) ((taosGetTimestampUs() - (_task)->profile.execTime[(_task)->execId % (_task)->maxExecTimes]) > (_task)->timeoutUsec) #define SCH_TASK_READY_FOR_LAUNCH(readyNum, task) ((readyNum) >= taosArrayGetSize((task)->children)) @@ -320,6 +293,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_TASK_NEED_FLOW_CTRL(_job, _task) (SCH_IS_DATA_BIND_QRY_TASK(_task) && SCH_JOB_NEED_FLOW_CTRL(_job) && SCH_IS_LEVEL_UNFINISHED((_task)->level)) #define SCH_FETCH_TYPE(_pSrcTask) (SCH_IS_DATA_BIND_QRY_TASK(_pSrcTask) ? TDMT_SCH_FETCH : TDMT_SCH_MERGE_FETCH) #define SCH_TASK_NEED_FETCH(_task) ((_task)->plan->subplanType != SUBPLAN_TYPE_MODIFY) +#define SCH_TASK_MAX_EXEC_TIMES(_levelIdx, _levelNum) (SCH_MAX_CANDIDATE_EP_NUM * ((_levelNum) - (_levelIdx))) #define SCH_SET_JOB_TYPE(_job, type) do { if ((type) != SUBPLAN_TYPE_MODIFY) { (_job)->attr.queryJob = true; } } while (0) #define SCH_IS_QUERY_JOB(_job) ((_job)->attr.queryJob) @@ -328,7 +302,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_JOB_NEED_DROP(_job) (SCH_IS_QUERY_JOB(_job)) #define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode) #define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL) -#define SCH_MERGE_TASK_NETWORK_ERR(_task, _code, _len) (SCH_NETWORK_ERR(_code) && ((_len) > 0 || SCH_IS_DATA_BIND_TASK(_task))) +#define SCH_MERGE_TASK_NETWORK_ERR(_task, _code, _len) (SCH_NETWORK_ERR(_code) && (((_len) > 0) || (!SCH_IS_DATA_BIND_TASK(_task)))) #define SCH_REDIRECT_MSGTYPE(_msgType) ((_msgType) == TDMT_SCH_QUERY || (_msgType) == TDMT_SCH_MERGE_QUERY || (_msgType) == TDMT_SCH_FETCH || (_msgType) == TDMT_SCH_MERGE_FETCH) #define SCH_TASK_NEED_REDIRECT(_task, _msgType, _code, _rspLen) (SCH_REDIRECT_MSGTYPE(_msgType) && (NEED_SCHEDULER_REDIRECT_ERROR(_code) || SCH_MERGE_TASK_NETWORK_ERR((_task), (_code), (_rspLen)))) #define SCH_NEED_RETRY(_msgType, _code) ((SCH_NETWORK_ERR(_code) && SCH_REDIRECT_MSGTYPE(_msgType)) || (_code) == TSDB_CODE_SCH_TIMEOUT_ERROR) @@ -338,6 +312,33 @@ extern SSchedulerMgmt schMgmt; #define SCH_SWITCH_EPSET(_addr) ((_addr)->epSet.inUse = ((_addr)->epSet.inUse + 1) % (_addr)->epSet.numOfEps) #define SCH_TASK_NUM_OF_EPS(_addr) ((_addr)->epSet.numOfEps) +#define SCH_LOG_TASK_START_TS(_task) \ + do { \ + int64_t us = taosGetTimestampUs(); \ + int32_t idx = (_task)->execId % (_task)->maxExecTimes; \ + (_task)->profile.execTime[idx] = us; \ + if (0 == (_task)->execId) { \ + (_task)->profile.startTs = us; \ + } \ + } while (0) + +#define SCH_LOG_TASK_WAIT_TS(_task) \ + do { \ + int64_t us = taosGetTimestampUs(); \ + int32_t idx = (_task)->execId % (_task)->maxExecTimes; \ + (_task)->profile.waitTime += us - (_task)->profile.execTime[idx]; \ + } while (0) + + +#define SCH_LOG_TASK_END_TS(_task) \ + do { \ + int64_t us = taosGetTimestampUs(); \ + int32_t idx = (_task)->execId % (_task)->maxExecTimes; \ + (_task)->profile.execTime[idx] = us - (_task)->profile.execTime[idx]; \ + (_task)->profile.endTs = us; \ + } while (0) + + #define SCH_JOB_ELOG(param, ...) qError("QID:0x%" PRIx64 " " param, pJob->queryId, __VA_ARGS__) #define SCH_JOB_DLOG(param, ...) qDebug("QID:0x%" PRIx64 " " param, pJob->queryId, __VA_ARGS__) @@ -431,7 +432,8 @@ void schFreeTask(SSchJob *pJob, SSchTask *pTask); void schDropTaskInHashList(SSchJob *pJob, SHashObj *list); int32_t schLaunchLevelTasks(SSchJob *pJob, SSchLevel *level); int32_t schGetTaskFromList(SHashObj *pTaskList, uint64_t taskId, SSchTask **pTask); -int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel); +int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel, int32_t levelNum); +int32_t schSwitchTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask); #ifdef __cplusplus diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index 13100afb8bcc89add22402b8b33e3cc490be19e3..d2f9624eee4d7d4636990daec5d44fd860720513 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -337,7 +337,7 @@ int32_t schValidateAndBuildJob(SQueryPlan *pDag, SSchJob *pJob) { SCH_SET_JOB_TYPE(pJob, plan->subplanType); SSchTask task = {0}; - SCH_ERR_JRET(schInitTask(pJob, &task, plan, pLevel)); + SCH_ERR_JRET(schInitTask(pJob, &task, plan, pLevel, levelNum)); SSchTask *pTask = taosArrayPush(pLevel->subTasks, &task); if (NULL == pTask) { diff --git a/source/libs/scheduler/src/schTask.c b/source/libs/scheduler/src/schTask.c index d56397283c3ab7a3ec76f42f771332272e6763bd..e1e4ed87699a1151cc8bad9d19e23c19af84e2d3 100644 --- a/source/libs/scheduler/src/schTask.c +++ b/source/libs/scheduler/src/schTask.c @@ -46,21 +46,31 @@ void schFreeTask(SSchJob *pJob, SSchTask *pTask) { } -int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel) { +int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel, int32_t levelNum) { + int32_t code = 0; + pTask->plan = pPlan; pTask->level = pLevel; pTask->execId = -1; - pTask->maxExecTimes = SCH_TASK_MAX_EXEC_TIMES; + pTask->maxExecTimes = SCH_TASK_MAX_EXEC_TIMES(pLevel->level, levelNum); pTask->timeoutUsec = SCH_DEFAULT_TASK_TIMEOUT_USEC; - SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_INIT); pTask->taskId = schGenTaskId(); pTask->execNodes = taosHashInit(SCH_MAX_CANDIDATE_EP_NUM, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK); - if (NULL == pTask->execNodes) { - SCH_TASK_ELOG("taosHashInit %d execNodes failed", SCH_MAX_CANDIDATE_EP_NUM); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + pTask->profile.execTime = taosMemoryCalloc(pTask->maxExecTimes, sizeof(int64_t)); + if (NULL == pTask->execNodes || NULL == pTask->profile.execTime) { + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_INIT); + return TSDB_CODE_SUCCESS; + +_return: + + taosMemoryFreeClear(pTask->profile.execTime); + taosHashCleanup(pTask->execNodes); + + SCH_RET(code); } int32_t schRecordTaskSucceedNode(SSchJob *pJob, SSchTask *pTask) { @@ -338,6 +348,11 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf* pData, int32 qClearSubplanExecutionNode(pTask->plan); + // Note: current error task and upper level merge task + if ((pData && 0 == pData->len) || NULL == pData) { + SCH_ERR_JRET(schSwitchTaskCandidateAddr(pJob, pTask)); + } + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_INIT); int32_t childrenNum = taosArrayGetSize(pTask->children); @@ -531,10 +546,7 @@ int32_t schHandleTaskRetry(SSchJob *pJob, SSchTask *pTask) { if (SCH_IS_DATA_BIND_TASK(pTask)) { SCH_SWITCH_EPSET(&pTask->plan->execNode); } else { - int32_t candidateNum = taosArrayGetSize(pTask->candidateAddrs); - if (++pTask->candidateIdx >= candidateNum) { - pTask->candidateIdx = 0; - } + SCH_ERR_RET(schSwitchTaskCandidateAddr(pJob, pTask)); } SCH_ERR_RET(schLaunchTask(pJob, pTask)); @@ -633,6 +645,16 @@ int32_t schUpdateTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask, SEpSet* pEpSe return TSDB_CODE_SUCCESS; } +int32_t schSwitchTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask) { + int32_t candidateNum = taosArrayGetSize(pTask->candidateAddrs); + if (++pTask->candidateIdx >= candidateNum) { + pTask->candidateIdx = 0; + } + SCH_TASK_DLOG("switch task candiateIdx to %d", pTask->candidateIdx); + return TSDB_CODE_SUCCESS; +} + + int32_t schRemoveTaskFromExecList(SSchJob *pJob, SSchTask *pTask) { int32_t code = taosHashRemove(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId));