From 44b170427d5705915bb95cb83e171a22724cbb1d Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 30 Dec 2020 14:43:50 +0800 Subject: [PATCH] TD-2616 --- src/client/src/tscUtil.c | 2 +- src/os/inc/osSemphone.h | 11 +++++---- src/os/src/detail/osSemphone.c | 3 ++- src/os/src/windows/wSemphone.c | 8 ++++--- src/query/src/qExecutor.c | 4 ++-- src/rpc/src/rpcCache.c | 4 ++-- src/rpc/src/rpcMain.c | 4 ++-- src/sync/src/syncMain.c | 36 +++++++++++++--------------- src/sync/src/syncRestore.c | 11 +++++++-- src/sync/src/syncRetrieve.c | 15 +++++++++--- src/util/src/tlog.c | 4 ++-- src/util/src/tnote.c | 2 +- src/util/src/tref.c | 4 ++-- src/util/src/ttimer.c | 8 +++---- tests/script/unique/mnode/mgmt20.sim | 18 ++++++++++++++ 15 files changed, 84 insertions(+), 50 deletions(-) diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 172887c110..52e26fe95a 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -2503,7 +2503,7 @@ bool tscSetSqlOwner(SSqlObj* pSql) { SSqlRes* pRes = &pSql->res; // set the sql object owner - uint64_t threadId = taosGetPthreadId(); + uint64_t threadId = taosGetSelfPthreadId(); if (atomic_val_compare_exchange_64(&pSql->owner, 0, threadId) != 0) { pRes->code = TSDB_CODE_QRY_IN_EXEC; return false; diff --git a/src/os/inc/osSemphone.h b/src/os/inc/osSemphone.h index a71e74e97f..74e1bd4878 100644 --- a/src/os/inc/osSemphone.h +++ b/src/os/inc/osSemphone.h @@ -29,12 +29,13 @@ extern "C" { #endif // TAOS_OS_FUNC_SEMPHONE_PTHREAD -bool taosCheckPthreadValid(pthread_t thread); -int64_t taosGetPthreadId(); -void taosResetPthread(pthread_t *thread); -bool taosComparePthread(pthread_t first, pthread_t second); +bool taosCheckPthreadValid(pthread_t thread); +int64_t taosGetSelfPthreadId(); +int64_t taosGetPthreadId(pthread_t thread); +void taosResetPthread(pthread_t* thread); +bool taosComparePthread(pthread_t first, pthread_t second); int32_t taosGetPId(); -int32_t taosGetCurrentAPPName(char *name, int32_t* len); +int32_t taosGetCurrentAPPName(char* name, int32_t* len); #ifdef __cplusplus } diff --git a/src/os/src/detail/osSemphone.c b/src/os/src/detail/osSemphone.c index 9eb8c18a40..d379e56ed8 100644 --- a/src/os/src/detail/osSemphone.c +++ b/src/os/src/detail/osSemphone.c @@ -31,7 +31,8 @@ int tsem_wait(tsem_t* sem) { #ifndef TAOS_OS_FUNC_SEMPHONE_PTHREAD bool taosCheckPthreadValid(pthread_t thread) { return thread != 0; } -int64_t taosGetPthreadId() { return (int64_t)pthread_self(); } +int64_t taosGetSelfPthreadId() { return (int64_t)pthread_self(); } +int64_t taosGetPthreadId(pthread_t thread) { return (int64_t)thread; } void taosResetPthread(pthread_t *thread) { *thread = 0; } bool taosComparePthread(pthread_t first, pthread_t second) { return first == second; } int32_t taosGetPId() { return getpid(); } diff --git a/src/os/src/windows/wSemphone.c b/src/os/src/windows/wSemphone.c index 1f723540f6..0bc760b35e 100644 --- a/src/os/src/windows/wSemphone.c +++ b/src/os/src/windows/wSemphone.c @@ -25,14 +25,16 @@ bool taosCheckPthreadValid(pthread_t thread) { return thread.p != NULL; } void taosResetPthread(pthread_t *thread) { thread->p = 0; } -int64_t taosGetPthreadId() { +int64_t taosGetPthreadId(pthread_t thread) { #ifdef PTW32_VERSION - return pthread_getw32threadid_np(pthread_self()); + return pthread_getw32threadid_np(thread); #else - return (int64_t)pthread_self(); + return (int64_t)thread; #endif } +int64_t taosGetSelfPthreadId() { return taosGetPthreadId(pthread_self()); } + bool taosComparePthread(pthread_t first, pthread_t second) { return first.p == second.p; } diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 7fb366160c..d9630edb9e 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -7250,7 +7250,7 @@ static bool doBuildResCheck(SQInfo* pQInfo) { // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is // put into task to be executed. - assert(pQInfo->owner == taosGetPthreadId()); + assert(pQInfo->owner == taosGetSelfPthreadId()); pQInfo->owner = 0; pthread_mutex_unlock(&pQInfo->lock); @@ -7263,7 +7263,7 @@ static bool doBuildResCheck(SQInfo* pQInfo) { bool qTableQuery(qinfo_t qinfo) { SQInfo *pQInfo = (SQInfo *)qinfo; assert(pQInfo && pQInfo->signature == pQInfo); - int64_t threadId = taosGetPthreadId(); + int64_t threadId = taosGetSelfPthreadId(); int64_t curOwner = 0; if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) { diff --git a/src/rpc/src/rpcCache.c b/src/rpc/src/rpcCache.c index 09d8f3bff1..60a12c26b7 100644 --- a/src/rpc/src/rpcCache.c +++ b/src/rpc/src/rpcCache.c @@ -272,7 +272,7 @@ static int rpcHashConn(void *handle, char *fqdn, uint16_t port, int8_t connType) } static void rpcLockCache(int64_t *lockedBy) { - int64_t tid = taosGetPthreadId(); + int64_t tid = taosGetSelfPthreadId(); int i = 0; while (atomic_val_compare_exchange_64(lockedBy, 0, tid) != 0) { if (++i % 100 == 0) { @@ -282,7 +282,7 @@ static void rpcLockCache(int64_t *lockedBy) { } static void rpcUnlockCache(int64_t *lockedBy) { - int64_t tid = taosGetPthreadId(); + int64_t tid = taosGetSelfPthreadId(); if (atomic_val_compare_exchange_64(lockedBy, tid, 0) != tid) { assert(false); } diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index ec1ee75dbb..13d6ed8ed5 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -1604,7 +1604,7 @@ static int rpcCheckAuthentication(SRpcConn *pConn, char *msg, int msgLen) { } static void rpcLockConn(SRpcConn *pConn) { - int64_t tid = taosGetPthreadId(); + int64_t tid = taosGetSelfPthreadId(); int i = 0; while (atomic_val_compare_exchange_64(&(pConn->lockedBy), 0, tid) != 0) { if (++i % 1000 == 0) { @@ -1614,7 +1614,7 @@ static void rpcLockConn(SRpcConn *pConn) { } static void rpcUnlockConn(SRpcConn *pConn) { - int64_t tid = taosGetPthreadId(); + int64_t tid = taosGetSelfPthreadId(); if (atomic_val_compare_exchange_64(&(pConn->lockedBy), tid, 0) != tid) { assert(false); } diff --git a/src/sync/src/syncMain.c b/src/sync/src/syncMain.c index 380e44780f..436f4de098 100644 --- a/src/sync/src/syncMain.c +++ b/src/sync/src/syncMain.c @@ -478,9 +478,7 @@ static void syncAddArbitrator(SSyncNode *pNode) { static void syncFreeNode(void *param) { SSyncNode *pNode = param; - - int32_t refCount = atomic_sub_fetch_32(&pNode->refCount, 1); - sDebug("vgId:%d, syncnode is freed, refCount:%d", pNode->vgId, refCount); + sDebug("vgId:%d, node is freed, refCount:%d", pNode->vgId, pNode->refCount); pthread_mutex_destroy(&pNode->mutex); tfree(pNode->pRecv); @@ -491,10 +489,10 @@ static void syncFreeNode(void *param) { SSyncNode *syncAcquireNode(int64_t rid) { SSyncNode *pNode = taosAcquireRef(tsNodeRefId, rid); if (pNode == NULL) { - sDebug("failed to acquire syncnode from refId:%" PRId64, rid); + sDebug("failed to acquire node from refId:%" PRId64, rid); } else { int32_t refCount = atomic_add_fetch_32(&pNode->refCount, 1); - sTrace("vgId:%d, acquire syncnode refId:%" PRId64 ", refCount:%d", pNode->vgId, rid, refCount); + sTrace("vgId:%d, acquire node refId:%" PRId64 ", refCount:%d", pNode->vgId, rid, refCount); } return pNode; @@ -502,16 +500,14 @@ SSyncNode *syncAcquireNode(int64_t rid) { void syncReleaseNode(SSyncNode *pNode) { int32_t refCount = atomic_sub_fetch_32(&pNode->refCount, 1); - sTrace("vgId:%d, dec syncnode refId:%" PRId64 " refCount:%d", pNode->vgId, pNode->rid, refCount); + sTrace("vgId:%d, release node refId:%" PRId64 ", refCount:%d", pNode->vgId, pNode->rid, refCount); taosReleaseRef(tsNodeRefId, pNode->rid); } static void syncFreePeer(void *param) { SSyncPeer *pPeer = param; - - int32_t refCount = atomic_sub_fetch_32(&pPeer->refCount, 1); - sDebug("%s, peer is freed, refCount:%d", pPeer->id, refCount); + sDebug("%s, peer is freed, refCount:%d", pPeer->id, pPeer->refCount); syncReleaseNode(pPeer->pSyncNode); tfree(pPeer); @@ -531,7 +527,7 @@ SSyncPeer *syncAcquirePeer(int64_t rid) { void syncReleasePeer(SSyncPeer *pPeer) { int32_t refCount = atomic_sub_fetch_32(&pPeer->refCount, 1); - sTrace("%s, dec peer refId:%" PRId64 ", refCount:%d", pPeer->id, pPeer->rid, refCount); + sTrace("%s, release peer refId:%" PRId64 ", refCount:%d", pPeer->id, pPeer->rid, refCount); taosReleaseRef(tsPeerRefId, pPeer->rid); } @@ -879,14 +875,14 @@ static void syncProcessSyncRequest(char *msg, SSyncPeer *pPeer) { int32_t ret = pthread_create(&thread, &thattr, syncRetrieveData, (void *)pPeer->rid); pthread_attr_destroy(&thattr); - if (ret != 0) { - sError("%s, failed to create sync thread since %s", pPeer->id, strerror(errno)); + if (ret < 0) { + sError("%s, failed to create sync retrieve thread since %s", pPeer->id, strerror(errno)); + syncReleasePeer(pPeer); } else { pPeer->sstatus = TAOS_SYNC_STATUS_START; - sDebug("%s, thread is created to retrieve data, set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]); + sDebug("%s, sync retrieve thread:0x%08" PRIx64 " create successfully, rid:%" PRId64 ", set sstatus:%s", pPeer->id, + taosGetPthreadId(thread), pPeer->rid, syncStatus[pPeer->sstatus]); } - - syncReleasePeer(pPeer); } static void syncNotStarted(void *param, void *tmrId) { @@ -1154,19 +1150,19 @@ static void syncCreateRestoreDataThread(SSyncPeer *pPeer) { (void)syncAcquirePeer(pPeer->rid); - int32_t ret = pthread_create(&(thread), &thattr, (void *)syncRestoreData, (void *)pPeer->rid); + int32_t ret = pthread_create(&thread, &thattr, (void *)syncRestoreData, (void *)pPeer->rid); pthread_attr_destroy(&thattr); if (ret < 0) { SSyncNode *pNode = pPeer->pSyncNode; nodeSStatus = TAOS_SYNC_STATUS_INIT; - sError("%s, failed to create sync thread, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]); + sError("%s, failed to create sync restore thread, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]); taosClose(pPeer->syncFd); + syncReleasePeer(pPeer); } else { - sInfo("%s, sync connection is up", pPeer->id); + sInfo("%s, sync restore thread:0x%08" PRIx64 " create successfully, rid:%" PRId64, pPeer->id, + taosGetPthreadId(thread), pPeer->rid); } - - syncReleasePeer(pPeer); } static void syncProcessIncommingConnection(int32_t connFd, uint32_t sourceIp) { diff --git a/src/sync/src/syncRestore.c b/src/sync/src/syncRestore.c index e815594883..a5e268cdd2 100644 --- a/src/sync/src/syncRestore.c +++ b/src/sync/src/syncRestore.c @@ -353,12 +353,16 @@ static int32_t syncRestoreDataStepByStep(SSyncPeer *pPeer) { void *syncRestoreData(void *param) { int64_t rid = (int64_t)param; SSyncPeer *pPeer = syncAcquirePeer(rid); - if (pPeer == NULL) return NULL; + if (pPeer == NULL) { + sError("failed to restore data, invalid peer rid:%" PRId64, rid); + return NULL; + } SSyncNode *pNode = pPeer->pSyncNode; taosBlockSIGPIPE(); __sync_fetch_and_add(&tsSyncNum, 1); + sInfo("%s, start to restore data, sstatus:%s", pPeer->id, syncStatus[nodeSStatus]); (*pNode->notifyRole)(pNode->vgId, TAOS_SYNC_ROLE_SYNCING); @@ -380,11 +384,14 @@ void *syncRestoreData(void *param) { (*pNode->notifyRole)(pNode->vgId, nodeRole); nodeSStatus = TAOS_SYNC_STATUS_INIT; - sInfo("%s, sync over, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]); + sInfo("%s, restore data over, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]); taosClose(pPeer->syncFd); syncCloseRecvBuffer(pNode); __sync_fetch_and_sub(&tsSyncNum, 1); + + // The ref is obtained in both the create thread and the current thread, so it is released twice + syncReleasePeer(pPeer); syncReleasePeer(pPeer); return NULL; diff --git a/src/sync/src/syncRetrieve.c b/src/sync/src/syncRetrieve.c index f3e0a6d353..b3be1ace39 100644 --- a/src/sync/src/syncRetrieve.c +++ b/src/sync/src/syncRetrieve.c @@ -194,7 +194,7 @@ static int32_t syncReadOneWalRecord(int32_t sfd, SWalHead *pHead) { } if (ret == 0) { - sTrace("sfd:%d, read to the end of file, ret:%d", sfd, ret); + sDebug("sfd:%d, read to the end of file, ret:%d", sfd, ret); return 0; } @@ -253,7 +253,7 @@ static int32_t syncRetrieveLastWal(SSyncPeer *pPeer, char *name, uint64_t fversi break; } - sTrace("%s, last wal is forwarded, hver:%" PRIu64, pPeer->id, pHead->version); + sDebug("%s, last wal is forwarded, hver:%" PRIu64, pPeer->id, pHead->version); int32_t wsize = code; int32_t ret = taosWriteMsg(pPeer->syncFd, pHead, wsize); @@ -466,10 +466,15 @@ static int32_t syncRetrieveDataStepByStep(SSyncPeer *pPeer) { void *syncRetrieveData(void *param) { int64_t rid = (int64_t)param; SSyncPeer *pPeer = syncAcquirePeer(rid); - if (pPeer == NULL) return NULL; + if (pPeer == NULL) { + sError("failed to retrieve data, invalid peer rid:%" PRId64, rid); + return NULL; + } SSyncNode *pNode = pPeer->pSyncNode; + taosBlockSIGPIPE(); + sInfo("%s, start to retrieve data, sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]); if (pNode->notifyFlowCtrl) (*pNode->notifyFlowCtrl)(pNode->vgId, pPeer->numOfRetrieves); @@ -496,7 +501,11 @@ void *syncRetrieveData(void *param) { pPeer->fileChanged = 0; taosClose(pPeer->syncFd); + + // The ref is obtained in both the create thread and the current thread, so it is released twice + syncReleasePeer(pPeer); syncReleasePeer(pPeer); + sInfo("%s, sync retrieve data over, sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]); return NULL; } diff --git a/src/util/src/tlog.c b/src/util/src/tlog.c index fa6a9db8ec..e0fe51e22a 100644 --- a/src/util/src/tlog.c +++ b/src/util/src/tlog.c @@ -364,7 +364,7 @@ void taosPrintLog(const char *flags, int32_t dflag, const char *format, ...) { ptm = localtime_r(&curTime, &Tm); len = sprintf(buffer, "%02d/%02d %02d:%02d:%02d.%06d 0x%08" PRIx64 " ", ptm->tm_mon + 1, ptm->tm_mday, ptm->tm_hour, - ptm->tm_min, ptm->tm_sec, (int32_t)timeSecs.tv_usec, taosGetPthreadId()); + ptm->tm_min, ptm->tm_sec, (int32_t)timeSecs.tv_usec, taosGetSelfPthreadId()); len += sprintf(buffer + len, "%s", flags); va_start(argpointer, format); @@ -450,7 +450,7 @@ void taosPrintLongString(const char *flags, int32_t dflag, const char *format, . ptm = localtime_r(&curTime, &Tm); len = sprintf(buffer, "%02d/%02d %02d:%02d:%02d.%06d 0x%08" PRIx64 " ", ptm->tm_mon + 1, ptm->tm_mday, ptm->tm_hour, - ptm->tm_min, ptm->tm_sec, (int32_t)timeSecs.tv_usec, taosGetPthreadId()); + ptm->tm_min, ptm->tm_sec, (int32_t)timeSecs.tv_usec, taosGetSelfPthreadId()); len += sprintf(buffer + len, "%s", flags); va_start(argpointer, format); diff --git a/src/util/src/tnote.c b/src/util/src/tnote.c index f2db0b3316..56f2322a7b 100644 --- a/src/util/src/tnote.c +++ b/src/util/src/tnote.c @@ -249,7 +249,7 @@ void taosNotePrint(SNoteObj *pNote, const char *const format, ...) { curTime = timeSecs.tv_sec; ptm = localtime_r(&curTime, &Tm); len = sprintf(buffer, "%02d/%02d %02d:%02d:%02d.%06d 0x%08" PRIx64 " ", ptm->tm_mon + 1, ptm->tm_mday, ptm->tm_hour, - ptm->tm_min, ptm->tm_sec, (int32_t)timeSecs.tv_usec, taosGetPthreadId()); + ptm->tm_min, ptm->tm_sec, (int32_t)timeSecs.tv_usec, taosGetSelfPthreadId()); va_start(argpointer, format); len += vsnprintf(buffer + len, MAX_NOTE_LINE_SIZE - len, format, argpointer); va_end(argpointer); diff --git a/src/util/src/tref.c b/src/util/src/tref.c index 1f83abcb84..3ef45e9b19 100644 --- a/src/util/src/tref.c +++ b/src/util/src/tref.c @@ -447,7 +447,7 @@ static int taosDecRefCount(int rsetId, int64_t rid, int remove) { } static void taosLockList(int64_t *lockedBy) { - int64_t tid = taosGetPthreadId(); + int64_t tid = taosGetSelfPthreadId(); int i = 0; while (atomic_val_compare_exchange_64(lockedBy, 0, tid) != 0) { if (++i % 100 == 0) { @@ -457,7 +457,7 @@ static void taosLockList(int64_t *lockedBy) { } static void taosUnlockList(int64_t *lockedBy) { - int64_t tid = taosGetPthreadId(); + int64_t tid = taosGetSelfPthreadId(); if (atomic_val_compare_exchange_64(lockedBy, tid, 0) != tid) { assert(false); } diff --git a/src/util/src/ttimer.c b/src/util/src/ttimer.c index 3c0462e980..015c687b3d 100644 --- a/src/util/src/ttimer.c +++ b/src/util/src/ttimer.c @@ -119,7 +119,7 @@ static void timerDecRef(tmr_obj_t* timer) { } static void lockTimerList(timer_list_t* list) { - int64_t tid = taosGetPthreadId(); + int64_t tid = taosGetSelfPthreadId(); int i = 0; while (atomic_val_compare_exchange_64(&(list->lockedBy), 0, tid) != 0) { if (++i % 1000 == 0) { @@ -129,7 +129,7 @@ static void lockTimerList(timer_list_t* list) { } static void unlockTimerList(timer_list_t* list) { - int64_t tid = taosGetPthreadId(); + int64_t tid = taosGetSelfPthreadId(); if (atomic_val_compare_exchange_64(&(list->lockedBy), tid, 0) != tid) { assert(false); tmrError("%" PRId64 " trying to unlock a timer list not locked by current thread.", tid); @@ -257,7 +257,7 @@ static bool removeFromWheel(tmr_obj_t* timer) { static void processExpiredTimer(void* handle, void* arg) { tmr_obj_t* timer = (tmr_obj_t*)handle; - timer->executedBy = taosGetPthreadId(); + timer->executedBy = taosGetSelfPthreadId(); uint8_t state = atomic_val_compare_exchange_8(&timer->state, TIMER_STATE_WAITING, TIMER_STATE_EXPIRED); if (state == TIMER_STATE_WAITING) { const char* fmt = "%s timer[id=%" PRIuPTR ", fp=%p, param=%p] execution start."; @@ -406,7 +406,7 @@ static bool doStopTimer(tmr_obj_t* timer, uint8_t state) { return false; } - if (timer->executedBy == taosGetPthreadId()) { + if (timer->executedBy == taosGetSelfPthreadId()) { // taosTmrReset is called in the timer callback, should do nothing in this // case to avoid dead lock. note taosTmrReset must be the last statement // of the callback funtion, will be a bug otherwise. diff --git a/tests/script/unique/mnode/mgmt20.sim b/tests/script/unique/mnode/mgmt20.sim index 6eb35b67ac..ee9c2b914f 100644 --- a/tests/script/unique/mnode/mgmt20.sim +++ b/tests/script/unique/mnode/mgmt20.sim @@ -50,6 +50,24 @@ $d1_first = $rows sql select * from log.dn2 $d2_first = $rows +$x = 0 +show4: + $x = $x + 1 + sleep 1000 + if $x == 20 then + return -1 + endi + +sql show mnodes +print dnode1 ==> $data2_1 +print dnode2 ==> $data2_2 +if $data2_1 != master then + goto show4 +endi +if $data2_2 != slave then + goto show4 +endi + sleep 3000 sql select * from log.dn1 $d1_second = $rows -- GitLab