diff --git a/documentation20/webdocs/markdowndocs/architecture-ch.md b/documentation20/webdocs/markdowndocs/architecture-ch.md index bfe3b55bd22fa9ec44e9305ed0924ef110ff7b15..7ab4b5d096b4676b205fa57175889f780df2cb45 100644 --- a/documentation20/webdocs/markdowndocs/architecture-ch.md +++ b/documentation20/webdocs/markdowndocs/architecture-ch.md @@ -82,7 +82,7 @@ TDengine 分布式架构的逻辑结构图如下: ### 节点之间的通讯 **通讯方式:**TDengine系统的各个节点之间的通讯是通过TCP/UDP进行的。因为考虑到物联网场景,数据写入的包一般不大,因此TDengine 除采用TCP做传输之外,还采用UDP方式,因为UDP 更加高效,而且不受连接数的限制。TDengine实现了自己的超时、重传、确认等机制,以确保UDP的可靠传输。对于数据量不到15K的数据包,采取UDP的方式进行传输,超过15K的,或者是查询类的操作,自动采取TCP的方式进行传输。同时,TDengine根据配置和数据包,会自动对数据进行压缩/解压缩,数字签名/认证等处理。对于数据节点之间的数据复制,只采用TCP方式进行数据传输。 -**FQDN配置**:一个数据节点有一个或多个FQDN,可以在系统配置文件taos.cfg通过选项“fqdn"进行指定,如果没有指定,系统将自动获取FQDN。如果节点没有配置FQDN,可以直接使用IP地址作为FQDN,但不建议使用,因为IP地址可变,一旦变化,将让集群无法正常工作。一个数据节点的EP(End Point)由FQDN + Port组成。 +**FQDN配置**:一个数据节点有一个或多个FQDN,可以在系统配置文件taos.cfg通过参数“fqdn"进行指定,如果没有指定,系统将自动获取FQDN。如果节点没有配置FQDN,可以直接将该节点的配置参数fqdn设置为它的IP地址。但不建议使用IP,因为IP地址可变,一旦变化,将让集群无法正常工作。一个数据节点的EP(End Point)由FQDN + Port组成。采用FQDN,需要保证DNS服务正常工作,或者在节点以及应用所在的节点配置好hosts文件。 **端口配置:**一个数据节点对外的端口由TDengine的系统配置参数serverPort决定,对集群内部通讯的端口是serverPort+5。集群内数据节点之间的数据复制操作还占有一个TCP端口,是serverPort+10. 为支持多线程高效的处理UDP数据,每个对内和对外的UDP链接,都需要占用5个连续的端口。因此一个数据节点总的端口范围为serverPort到serverPort + 10,总共11个TCP/UDP端口。使用时,需要确保防火墙将这些端口打开。每个数据节点可以配置不同的serverPort。 diff --git a/packaging/tools/install.sh b/packaging/tools/install.sh index f38677f7acd683125f25a5d7e26ec3a8a84c66ae..1cbb3ead9cee61c44f4b311f730f8f23458e72af 100755 --- a/packaging/tools/install.sh +++ b/packaging/tools/install.sh @@ -705,6 +705,7 @@ function install_TDengine() { echo echo -e "\033[44;32;1mTDengine client is installed successfully!${NC}" fi + touch ~/.taos_history rm -rf $(tar -tf taos.tar.gz) } diff --git a/src/client/src/tscProfile.c b/src/client/src/tscProfile.c index f48e7b7691536cb8f9a5b8561e95b8a5a0f0fefc..6ff97e9d00ad9cbf8cd6fdc92713b111a497c321 100644 --- a/src/client/src/tscProfile.c +++ b/src/client/src/tscProfile.c @@ -285,9 +285,9 @@ void tscKillConnection(STscObj *pObj) { SSqlObj *pSql = pObj->sqlList; while (pSql) { - //taosStopRpcConn(pSql->thandle); pSql = pSql->next; } + SSqlStream *pStream = pObj->streamList; while (pStream) { diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 9282fa74fbab3e32f8332b69d3800d84c37e5dd6..4f179adf72320c971dc74ccf28b12f4d022a17fc 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -226,13 +226,17 @@ int tscSendMsgToServer(SSqlObj *pSql) { .handle = &pSql->pRpcCtx, .code = 0 }; - // NOTE: the rpc context should be acquired before sending data to server. // Otherwise, the pSql object may have been released already during the response function, which is // processMsgFromServer function. In the meanwhile, the assignment of the rpc context to sql object will absolutely // cause crash. - rpcSendRequest(pObj->pDnodeConn, &pSql->epSet, &rpcMsg); - return TSDB_CODE_SUCCESS; + if (pObj != NULL && pObj->signature == pObj) { + rpcSendRequest(pObj->pDnodeConn, &pSql->epSet, &rpcMsg); + return TSDB_CODE_SUCCESS; + } else { + //pObj->signature has been reset by other thread, ignore concurrency problem + return TSDB_CODE_TSC_CONN_KILLED; + } } void tscProcessMsgFromServer(SRpcMsg *rpcMsg, SRpcEpSet *pEpSet) { diff --git a/src/dnode/src/dnodeSystem.c b/src/dnode/src/dnodeSystem.c index 6f32bc0f7ad3e7a5e1704cc24cf54ac62dbfd694..56316e9619344243ab2e5fa46e0acfc6e0331e8a 100644 --- a/src/dnode/src/dnodeSystem.c +++ b/src/dnode/src/dnodeSystem.c @@ -119,11 +119,8 @@ int32_t main(int32_t argc, char *argv[]) { syslog(LOG_INFO, "Started TDengine service successfully."); - for (int res = tsem_wait(&exitSem); res != 0; res = tsem_wait(&exitSem)) { - if (res != EINTR) { - syslog(LOG_ERR, "failed to wait exit semphore: %d", res); - break; - } + if (tsem_wait(&exitSem) != 0) { + syslog(LOG_ERR, "failed to wait exit semphore: %s", strerror(errno)); } dnodeCleanUpSystem(); diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index df69d1d19a8910622565dc1ebfd6a4829b79de7b..e7c3d5e4cce154b84c03dadc3f4f0a7c7f1ac69d 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -97,6 +97,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TSC_APP_ERROR, 0, 0x0211, "Applicatio TAOS_DEFINE_ERROR(TSDB_CODE_TSC_ACTION_IN_PROGRESS, 0, 0x0212, "Action in progress") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_DISCONNECTED, 0, 0x0213, "Disconnected from service") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_NO_WRITE_AUTH, 0, 0x0214, "No write permission") +TAOS_DEFINE_ERROR(TSDB_CODE_TSC_CONN_KILLED, 0, 0x0215, "Connection killed") // mnode TAOS_DEFINE_ERROR(TSDB_CODE_MND_MSG_NOT_PROCESSED, 0, 0x0300, "Message not processed") diff --git a/src/mnode/src/mnodeProfile.c b/src/mnode/src/mnodeProfile.c index 34147f2e4b3599c5b8357ba510eaab967352c007..06f992c26a4b2fd6eec19abb9743e4add2536000 100644 --- a/src/mnode/src/mnodeProfile.c +++ b/src/mnode/src/mnodeProfile.c @@ -100,7 +100,7 @@ SConnObj *mnodeCreateConn(char *user, uint32_t ip, uint16_t port) { }; tstrncpy(connObj.user, user, sizeof(connObj.user)); - SConnObj *pConn = taosCachePut(tsMnodeConnCache, &connId, sizeof(int32_t), &connObj, sizeof(connObj), CONN_KEEP_TIME*1000); + SConnObj *pConn = taosCachePut(tsMnodeConnCache, &connId, sizeof(int32_t), &connObj, sizeof(connObj), CONN_KEEP_TIME * 1000); mDebug("connId:%d, is created, user:%s ip:%s:%u", connId, user, taosIpStr(ip), port); return pConn; diff --git a/src/os/inc/osSemphone.h b/src/os/inc/osSemphone.h index fd88d2d7981a4e0f9dc397e73bf242eb7e059cf7..4280b458a65b23b3aa680c6ed46f61beb5c48948 100644 --- a/src/os/inc/osSemphone.h +++ b/src/os/inc/osSemphone.h @@ -23,7 +23,7 @@ extern "C" { #ifndef TAOS_OS_FUNC_SEMPHONE #define tsem_t sem_t #define tsem_init sem_init - #define tsem_wait sem_wait + int tsem_wait(tsem_t* sem); #define tsem_post sem_post #define tsem_destroy sem_destroy #endif diff --git a/src/os/src/detail/osSemphone.c b/src/os/src/detail/osSemphone.c index 74f8859029f95c67a2f298a0dbfa983978b4b78f..b91888845edd9e98315994eaada9eca245aacb24 100644 --- a/src/os/src/detail/osSemphone.c +++ b/src/os/src/detail/osSemphone.c @@ -16,6 +16,18 @@ #define _DEFAULT_SOURCE #include "os.h" +#ifndef TAOS_OS_FUNC_SEMPHONE + +int tsem_wait(tsem_t* sem) { + int ret = 0; + do { + ret = sem_wait(sem); + } while (ret != 0 && errno == EINTR); + return ret; +} + +#endif + #ifndef TAOS_OS_FUNC_SEMPHONE_PTHREAD bool taosCheckPthreadValid(pthread_t thread) { return thread != 0; } diff --git a/src/sync/src/tarbitrator.c b/src/sync/src/tarbitrator.c index 4e55cc475f0b48f9786debf75532af16a48db787..40438ab1d2172583ace9d12f08b774b39c1b0bfe 100644 --- a/src/sync/src/tarbitrator.c +++ b/src/sync/src/tarbitrator.c @@ -99,9 +99,7 @@ int main(int argc, char *argv[]) { sInfo("TAOS arbitrator: %s:%d is running", tsNodeFqdn, tsArbitratorPort); - for (int res = tsem_wait(&tsArbSem); res != 0; res = tsem_wait(&tsArbSem)) { - if (res != EINTR) break; - } + tsem_wait(&tsArbSem); taosCloseTcpThreadPool(tsArbTcpPool); sInfo("TAOS arbitrator is shut down\n"); diff --git a/src/util/src/tsched.c b/src/util/src/tsched.c index cf7f5c10d4c369211baddcf43e1ec0d10fea82b1..f014dd0fab5494bd85f197e2c79fac53359e8edf 100644 --- a/src/util/src/tsched.c +++ b/src/util/src/tsched.c @@ -123,11 +123,6 @@ void *taosProcessSchedQueue(void *param) { while (1) { if (tsem_wait(&pSched->fullSem) != 0) { - if (errno == EINTR) { - /* sem_wait is interrupted by interrupt, ignore and continue */ - uDebug("wait %s fullSem was interrupted", pSched->label); - continue; - } uError("wait %s fullSem failed(%s)", pSched->label, strerror(errno)); } if (pSched->stop) { @@ -163,12 +158,8 @@ int taosScheduleTask(void *qhandle, SSchedMsg *pMsg) { return 0; } - while (tsem_wait(&pSched->emptySem) != 0) { - if (errno != EINTR) { - uError("wait %s emptySem failed(%s)", pSched->label, strerror(errno)); - break; - } - uDebug("wait %s emptySem was interrupted", pSched->label); + if (tsem_wait(&pSched->emptySem) != 0) { + uError("wait %s emptySem failed(%s)", pSched->label, strerror(errno)); } if (pthread_mutex_lock(&pSched->queueMutex) != 0)