diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index 700a4d9daf0985e2608f365149be99ea3e24a877..367e656f064a200ace8a051a88a554808f7dae61 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -181,6 +181,7 @@ typedef struct SRequestSendRecvBody { tsem_t rspSem; // not used now __taos_async_fn_t queryFp; __taos_async_fn_t fetchFp; + EQueryExecMode execMode; void* param; SDataBuf requestMsg; int64_t queryJob; // query job, created according to sql query DAG. diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 32c11983c2696a5f5b6542a999f7358f31db96c1..d846cb93af6a2c16fd21d2f954b8393e426aa27a 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -153,7 +153,7 @@ int32_t buildRequest(uint64_t connId, const char* sql, int sqlLen, void* param, *pRequest = createRequest(connId, TSDB_SQL_SELECT); if (*pRequest == NULL) { tscError("failed to malloc sqlObj, %s", sql); - return TSDB_CODE_TSC_OUT_OF_MEMORY; + return terrno; } (*pRequest)->sqlstr = taosMemoryMalloc(sqlLen + 1); @@ -933,6 +933,8 @@ SRequestObj* launchQuery(uint64_t connId, const char* sql, int sqlLen, bool vali void launchAsyncQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaData* pResultMeta) { int32_t code = 0; + pRequest->body.execMode = pQuery->execMode; + switch (pQuery->execMode) { case QUERY_EXEC_MODE_LOCAL: asyncExecLocalCmd(pRequest, pQuery); @@ -1149,7 +1151,6 @@ STscObj* taosConnectImpl(const char* user, const char* auth, const char* db, __t SRequestObj* pRequest = createRequest(pTscObj->id, TDMT_MND_CONNECT); if (pRequest == NULL) { destroyTscObj(pTscObj); - terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; return NULL; } diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 73def5b9b188b394a9ba3e0e3816a7e0944e4653..14a431feabab2934aed0a7eee46f85dbdae2009c 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -49,7 +49,7 @@ int taos_options(TSDB_OPTION option, const void *arg, ...) { } // this function may be called by user or system, or by both simultaneously. void taos_cleanup(void) { - tscInfo("start to cleanup client environment"); + tscDebug("start to cleanup client environment"); if (atomic_val_compare_exchange_32(&sentinel, TSC_VAR_NOT_RELEASE, TSC_VAR_RELEASED) != TSC_VAR_NOT_RELEASE) { return; } @@ -58,7 +58,10 @@ void taos_cleanup(void) { clientReqRefPool = -1; taosCloseRef(id); - cleanupTaskQueue(); + hbMgrCleanUp(); + + catalogDestroy(); + schedulerDestroy(); fmFuncMgtDestroy(); qCleanupKeywordsTable(); @@ -67,12 +70,11 @@ void taos_cleanup(void) { clientConnRefPool = -1; taosCloseRef(id); - hbMgrCleanUp(); + rpcCleanup(); + tscDebug("rpc cleanup"); - catalogDestroy(); - schedulerDestroy(); + cleanupTaskQueue(); - rpcCleanup(); tscInfo("all local resources released"); taosCleanupCfg(); taosCloseLog(); @@ -852,27 +854,24 @@ void taos_fetch_rows_a(TAOS_RES *res, __taos_async_fn_t fp, void *param) { } // all data has returned to App already, no need to try again - if (pResultInfo->completed && (pRequest->body.queryJob != 0)) { - pResultInfo->numOfRows = 0; - pRequest->body.fetchFp(param, pRequest, pResultInfo->numOfRows); - return; - } - - // it is a local executed query, no need to do async fetch - if (pRequest->body.queryJob == 0) { - ASSERT(pResultInfo->completed && pResultInfo->numOfRows >= 0); - if (pResultInfo->localResultFetched) { - pResultInfo->numOfRows = 0; - pResultInfo->current = 0; - pRequest->body.fetchFp(param, pRequest, pResultInfo->numOfRows); + if (pResultInfo->completed) { + // it is a local executed query, no need to do async fetch + if (QUERY_EXEC_MODE_LOCAL == pRequest->body.execMode) { + ASSERT(pResultInfo->numOfRows >= 0); + if (pResultInfo->localResultFetched) { + pResultInfo->numOfRows = 0; + pResultInfo->current = 0; + } else { + pResultInfo->localResultFetched = true; + } } else { - pResultInfo->localResultFetched = true; - pRequest->body.fetchFp(param, pRequest, pResultInfo->numOfRows); + pResultInfo->numOfRows = 0; } + + pRequest->body.fetchFp(param, pRequest, pResultInfo->numOfRows); return; } - SSchedulerReq req = { .syncReq = false, .fetchFp = fetchCallback, diff --git a/source/libs/catalog/src/catalog.c b/source/libs/catalog/src/catalog.c index ca50702894a2a473099fc336c385d01588354173..1b7f53ae67c41ff8f757b8ab65d8f18cec549f84 100644 --- a/source/libs/catalog/src/catalog.c +++ b/source/libs/catalog/src/catalog.c @@ -1293,7 +1293,7 @@ void catalogDestroy(void) { if (!taosCheckCurrentInDll()) { ctgClearCacheEnqueue(NULL, true, true, true); - taosThreadJoin(gCtgMgmt.updateThread, NULL); + taosThreadJoin(gCtgMgmt.updateThread, NULL); } taosHashCleanup(gCtgMgmt.pCluster); diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index 0a0d3190259693cc0f8205d5874a5fb33568a06a..b6b17cb1066e738e360bd7a8cd2d80c5b0d7def3 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -223,6 +223,7 @@ typedef struct SSchJobAttr { typedef struct { int32_t op; + SRWLatch lock; bool syncReq; } SSchOpStatus; @@ -473,6 +474,7 @@ int32_t schGetTaskFromList(SHashObj *pTaskList, uint64_t taskId, SSchTask **pTas int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel, int32_t levelNum); int32_t schSwitchTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask); void schDirectPostJobRes(SSchedulerReq* pReq, int32_t errCode); +bool schChkCurrentOp(SSchJob *pJob, int32_t op, bool sync); extern SSchDebug gSCHDebug; diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index e482814ee7a71d12acb350ae99f3db9656892a02..19bb93249f50d22bb535e25654fc93bbab11a981 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -443,25 +443,37 @@ int32_t schNotifyUserFetchRes(SSchJob* pJob) { } void schPostJobRes(SSchJob *pJob, SCH_OP_TYPE op) { + SCH_LOCK(SCH_WRITE, &pJob->opStatus.lock); + if (SCH_OP_NULL == pJob->opStatus.op) { SCH_JOB_DLOG("job not in any operation, no need to post job res, status:%s", jobTaskStatusStr(pJob->status)); - return; + goto _return; } if (op && pJob->opStatus.op != op) { SCH_JOB_ELOG("job in operation %s mis-match with expected %s", schGetOpStr(pJob->opStatus.op), schGetOpStr(op)); - return; + goto _return; } if (SCH_JOB_IN_SYNC_OP(pJob)) { + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); tsem_post(&pJob->rspSem); } else if (SCH_JOB_IN_ASYNC_EXEC_OP(pJob)) { + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); schNotifyUserExecRes(pJob); } else if (SCH_JOB_IN_ASYNC_FETCH_OP(pJob)) { + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); schNotifyUserFetchRes(pJob); } else { + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); SCH_JOB_ELOG("job not in any operation, status:%s", jobTaskStatusStr(pJob->status)); } + + return; + +_return: + + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); } int32_t schProcessOnJobFailureImpl(SSchJob *pJob, int32_t status, int32_t errCode) { @@ -658,13 +670,13 @@ int32_t schJobFetchRows(SSchJob *pJob) { if (!(pJob->attr.explainMode == EXPLAIN_MODE_STATIC)) { SCH_ERR_RET(schLaunchFetchTask(pJob)); - if (pJob->opStatus.syncReq) { + if (schChkCurrentOp(pJob, SCH_OP_FETCH, true)) { SCH_JOB_DLOG("sync wait for rsp now, job status:%s", SCH_GET_JOB_STATUS_STR(pJob)); tsem_wait(&pJob->rspSem); SCH_RET(schDumpJobFetchRes(pJob, pJob->userRes.fetchRes)); } } else { - if (pJob->opStatus.syncReq) { + if (schChkCurrentOp(pJob, SCH_OP_FETCH, true)) { SCH_RET(schDumpJobFetchRes(pJob, pJob->userRes.fetchRes)); } else { schPostJobRes(pJob, SCH_OP_FETCH); @@ -775,25 +787,37 @@ void schDirectPostJobRes(SSchedulerReq* pReq, int32_t errCode) { } } +bool schChkCurrentOp(SSchJob *pJob, int32_t op, bool sync) { + SCH_LOCK(SCH_READ, &pJob->opStatus.lock); + bool r = (pJob->opStatus.op == op) && (pJob->opStatus.syncReq == sync); + SCH_UNLOCK(SCH_READ, &pJob->opStatus.lock); + + return r; +} + void schProcessOnOpEnd(SSchJob *pJob, SCH_OP_TYPE type, SSchedulerReq* pReq, int32_t errCode) { int32_t op = 0; switch (type) { case SCH_OP_EXEC: if (pReq && pReq->syncReq) { + SCH_LOCK(SCH_WRITE, &pJob->opStatus.lock); op = atomic_val_compare_exchange_32(&pJob->opStatus.op, type, SCH_OP_NULL); if (SCH_OP_NULL == op || op != type) { SCH_JOB_ELOG("job not in %s operation, op:%s, status:%s", schGetOpStr(type), schGetOpStr(op), jobTaskStatusStr(pJob->status)); } + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); schDumpJobExecRes(pJob, pReq->pExecRes); } break; case SCH_OP_FETCH: if (pReq && pReq->syncReq) { + SCH_LOCK(SCH_WRITE, &pJob->opStatus.lock); op = atomic_val_compare_exchange_32(&pJob->opStatus.op, type, SCH_OP_NULL); if (SCH_OP_NULL == op || op != type) { SCH_JOB_ELOG("job not in %s operation, op:%s, status:%s", schGetOpStr(type), schGetOpStr(op), jobTaskStatusStr(pJob->status)); } + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); } break; case SCH_OP_GET_STATUS: @@ -816,8 +840,10 @@ int32_t schProcessOnOpBegin(SSchJob* pJob, SCH_OP_TYPE type, SSchedulerReq* pReq switch (type) { case SCH_OP_EXEC: + SCH_LOCK(SCH_WRITE, &pJob->opStatus.lock); if (SCH_OP_NULL != atomic_val_compare_exchange_32(&pJob->opStatus.op, SCH_OP_NULL, type)) { SCH_JOB_ELOG("job already in %s operation", schGetOpStr(pJob->opStatus.op)); + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); schDirectPostJobRes(pReq, TSDB_CODE_TSC_APP_ERROR); SCH_ERR_RET(TSDB_CODE_TSC_APP_ERROR); } @@ -825,10 +851,13 @@ int32_t schProcessOnOpBegin(SSchJob* pJob, SCH_OP_TYPE type, SSchedulerReq* pReq SCH_JOB_DLOG("job start %s operation", schGetOpStr(pJob->opStatus.op)); pJob->opStatus.syncReq = pReq->syncReq; + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); break; case SCH_OP_FETCH: + SCH_LOCK(SCH_WRITE, &pJob->opStatus.lock); if (SCH_OP_NULL != atomic_val_compare_exchange_32(&pJob->opStatus.op, SCH_OP_NULL, type)) { SCH_JOB_ELOG("job already in %s operation", schGetOpStr(pJob->opStatus.op)); + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); schDirectPostJobRes(pReq, TSDB_CODE_TSC_APP_ERROR); SCH_ERR_RET(TSDB_CODE_TSC_APP_ERROR); } @@ -840,6 +869,7 @@ int32_t schProcessOnOpBegin(SSchJob* pJob, SCH_OP_TYPE type, SSchedulerReq* pReq pJob->userRes.cbParam = pReq->cbParam; pJob->opStatus.syncReq = pReq->syncReq; + SCH_UNLOCK(SCH_WRITE, &pJob->opStatus.lock); if (!SCH_JOB_NEED_FETCH(pJob)) { SCH_JOB_ELOG("no need to fetch data, status:%s", SCH_GET_JOB_STATUS_STR(pJob)); diff --git a/source/libs/scheduler/src/schTask.c b/source/libs/scheduler/src/schTask.c index 71d123b7998d1c9759758fad33ff3ab411223d5a..236257666b23b93d174f8f1126497f061dfd90c9 100644 --- a/source/libs/scheduler/src/schTask.c +++ b/source/libs/scheduler/src/schTask.c @@ -505,6 +505,7 @@ int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bo return TSDB_CODE_SUCCESS; } +/* if (SCH_IS_DATA_BIND_TASK(pTask)) { if ((pTask->execId + 1) >= SCH_TASK_NUM_OF_EPS(&pTask->plan->execNode)) { *needRetry = false; @@ -522,6 +523,7 @@ int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bo return TSDB_CODE_SUCCESS; } } +*/ *needRetry = true; SCH_TASK_DLOG("task need the %dth retry, errCode:%x - %s", pTask->execId + 1, errCode, tstrerror(errCode)); diff --git a/source/util/src/tref.c b/source/util/src/tref.c index 2e4c33bc8758f995c10e8efef6569e9f30cb67ee..9cd849b9be023f2db1d41266104d889fcb38768d 100644 --- a/source/util/src/tref.c +++ b/source/util/src/tref.c @@ -431,7 +431,7 @@ static int32_t taosDecRefCount(int32_t rsetId, int64_t rid, int32_t remove) { } released = 1; } else { - uTrace("rsetId:%d p:%p rid:%" PRId64 " is released", rsetId, pNode->p, rid); + uTrace("rsetId:%d p:%p rid:%" PRId64 " is released, remain count %d", rsetId, pNode->p, rid, pNode->count); } } else { uTrace("rsetId:%d rid:%" PRId64 " is not there, failed to release/remove", rsetId, rid); diff --git a/source/util/src/tsched.c b/source/util/src/tsched.c index 691a0d34d42ca3ab04be5daf61414016436a6bb1..9abce966f5f97fc1f5b050f3a9bd3e0d2e8a3519 100644 --- a/source/util/src/tsched.c +++ b/source/util/src/tsched.c @@ -129,7 +129,7 @@ void *taosProcessSchedQueue(void *scheduler) { while (1) { if ((ret = tsem_wait(&pSched->fullSem)) != 0) { uFatal("wait %s fullSem failed(%s)", pSched->label, strerror(errno)); - exit(ret); + ASSERT(0); } if (pSched->stop) { break; @@ -137,7 +137,7 @@ void *taosProcessSchedQueue(void *scheduler) { if ((ret = taosThreadMutexLock(&pSched->queueMutex)) != 0) { uFatal("lock %s queueMutex failed(%s)", pSched->label, strerror(errno)); - exit(ret); + ASSERT(0); } msg = pSched->queue[pSched->fullSlot]; @@ -146,12 +146,12 @@ void *taosProcessSchedQueue(void *scheduler) { if ((ret = taosThreadMutexUnlock(&pSched->queueMutex)) != 0) { uFatal("unlock %s queueMutex failed(%s)", pSched->label, strerror(errno)); - exit(ret); + ASSERT(0); } if ((ret = tsem_post(&pSched->emptySem)) != 0) { uFatal("post %s emptySem failed(%s)", pSched->label, strerror(errno)); - exit(ret); + ASSERT(0); } if (msg.fp) @@ -174,12 +174,12 @@ void taosScheduleTask(void *queueScheduler, SSchedMsg *pMsg) { if ((ret = tsem_wait(&pSched->emptySem)) != 0) { uFatal("wait %s emptySem failed(%s)", pSched->label, strerror(errno)); - exit(ret); + ASSERT(0); } if ((ret = taosThreadMutexLock(&pSched->queueMutex)) != 0) { uFatal("lock %s queueMutex failed(%s)", pSched->label, strerror(errno)); - exit(ret); + ASSERT(0); } pSched->queue[pSched->emptySlot] = *pMsg; @@ -187,12 +187,12 @@ void taosScheduleTask(void *queueScheduler, SSchedMsg *pMsg) { if ((ret = taosThreadMutexUnlock(&pSched->queueMutex)) != 0) { uFatal("unlock %s queueMutex failed(%s)", pSched->label, strerror(errno)); - exit(ret); + ASSERT(0); } if ((ret = tsem_post(&pSched->fullSem)) != 0) { uFatal("post %s fullSem failed(%s)", pSched->label, strerror(errno)); - exit(ret); + ASSERT(0); } } @@ -200,6 +200,8 @@ void taosCleanUpScheduler(void *param) { SSchedQueue *pSched = (SSchedQueue *)param; if (pSched == NULL) return; + uDebug("start to cleanup %s schedQsueue", pSched->label); + pSched->stop = true; for (int32_t i = 0; i < pSched->numOfThreads; ++i) { if (taosCheckPthreadValid(pSched->qthread[i])) { diff --git a/tests/script/api/stopquery.c b/tests/script/api/stopquery.c index 0f27fdf9f9184788b1fd3ad6f7a0bcad741c3785..082d987a22461725411c6528729d3de7f8e33c7c 100644 --- a/tests/script/api/stopquery.c +++ b/tests/script/api/stopquery.c @@ -36,7 +36,7 @@ int64_t st, et; char hostName[128]; char dbName[128]; char tbName[128]; -int32_t runTimes = 10000; +int32_t runTimes = 1000; typedef struct { int id; @@ -85,9 +85,12 @@ static void sqExecSQLE(TAOS *taos, char *command) { taos_free_result(pSql); } +void sqError(char* prefix, const char* errMsg) { + fprintf(stderr, "%s error: %s\n", prefix, errMsg); +} void sqExit(char* prefix, const char* errMsg) { - fprintf(stderr, "%s error: %s\n", prefix, errMsg); + sqError(prefix, errMsg); exit(1); } @@ -141,16 +144,20 @@ void sqCloseFetchCb(void *param, TAOS_RES *pRes, int numOfRows) { taos_close(qParam->taos); *qParam->end = 1; + + taos_free_result(pRes); } void sqCloseQueryCb(void *param, TAOS_RES *pRes, int code) { SSP_CB_PARAM *qParam = (SSP_CB_PARAM *)param; if (code == 0 && pRes) { if (qParam->fetch) { - taos_fetch_rows_a(pRes, sqFreeFetchCb, param); + taos_fetch_rows_a(pRes, sqCloseFetchCb, param); } else { taos_close(qParam->taos); *qParam->end = 1; + + taos_free_result(pRes); } } else { sqExit("select", taos_errstr(pRes)); @@ -203,7 +210,9 @@ void sqAsyncQueryCb(void *param, TAOS_RES *pRes, int code) { *qParam->end = 1; } } else { - sqExit("select", taos_errstr(pRes)); + sqError("select", taos_errstr(pRes)); + *qParam->end = 1; + taos_free_result(pRes); } } @@ -358,6 +367,7 @@ int sqCloseSyncQuery(bool fetch) { } taos_close(taos); + taos_free_result(pRes); } CASE_LEAVE(); } @@ -382,7 +392,7 @@ int sqCloseAsyncQuery(bool fetch) { SSP_CB_PARAM param = {0}; param.fetch = fetch; param.end = &qEnd; - taos_query_a(taos, sql, sqFreeQueryCb, ¶m); + taos_query_a(taos, sql, sqCloseQueryCb, ¶m); while (0 == qEnd) { usleep(5000); } @@ -457,8 +467,6 @@ void *closeThreadFp(void *arg) { } } - - void *killThreadFp(void *arg) { SSP_CB_PARAM* qParam = (SSP_CB_PARAM*)arg; while (true) { @@ -471,6 +479,19 @@ void *killThreadFp(void *arg) { } } +void *cleanupThreadFp(void *arg) { + SSP_CB_PARAM* qParam = (SSP_CB_PARAM*)arg; + while (true) { + if (qParam->taos) { + usleep(rand() % 10000); + taos_cleanup(); + break; + } + usleep(1); + } +} + + int sqConCloseSyncQuery(bool fetch) { @@ -578,6 +599,8 @@ int sqConKillSyncQuery(bool fetch) { pthread_join(qid, NULL); pthread_join(cid, NULL); + + taos_close(param.taos); } CASE_LEAVE(); } @@ -593,6 +616,40 @@ int sqConKillAsyncQuery(bool fetch) { pthread_join(qid, NULL); pthread_join(cid, NULL); + + taos_close(param.taos); + } + CASE_LEAVE(); +} + +int sqConCleanupSyncQuery(bool fetch) { + CASE_ENTER(); + pthread_t qid, cid; + for (int32_t i = 0; i < runTimes; ++i) { + SSP_CB_PARAM param = {0}; + param.fetch = fetch; + pthread_create(&qid, NULL, syncQueryThreadFp, (void*)¶m); + pthread_create(&cid, NULL, cleanupThreadFp, (void*)¶m); + + pthread_join(qid, NULL); + pthread_join(cid, NULL); + break; + } + CASE_LEAVE(); +} + +int sqConCleanupAsyncQuery(bool fetch) { + CASE_ENTER(); + pthread_t qid, cid; + for (int32_t i = 0; i < runTimes; ++i) { + SSP_CB_PARAM param = {0}; + param.fetch = fetch; + pthread_create(&qid, NULL, asyncQueryThreadFp, (void*)¶m); + pthread_create(&cid, NULL, cleanupThreadFp, (void*)¶m); + + pthread_join(qid, NULL); + pthread_join(cid, NULL); + break; } CASE_LEAVE(); } @@ -600,7 +657,7 @@ int sqConKillAsyncQuery(bool fetch) { void sqRunAllCase(void) { -/* +#if 1 sqStopSyncQuery(false); sqStopSyncQuery(true); sqStopAsyncQuery(false); @@ -620,23 +677,26 @@ void sqRunAllCase(void) { sqConCloseSyncQuery(true); sqConCloseAsyncQuery(false); sqConCloseAsyncQuery(true); -*/ -#if 0 sqKillSyncQuery(false); sqKillSyncQuery(true); sqKillAsyncQuery(false); sqKillAsyncQuery(true); -#endif - //sqConKillSyncQuery(false); + sqConKillSyncQuery(false); sqConKillSyncQuery(true); -#if 0 sqConKillAsyncQuery(false); sqConKillAsyncQuery(true); #endif + /* + sqConCleanupSyncQuery(false); + sqConCleanupSyncQuery(true); + sqConCleanupAsyncQuery(false); + sqConCleanupAsyncQuery(true); + */ + int32_t l = 5; while (l) { printf("%d\n", l--);