From a48c13aaf1427072013b5d4fe758ab23617e614e Mon Sep 17 00:00:00 2001 From: hjxilinx Date: Mon, 25 Nov 2019 18:06:37 +0800 Subject: [PATCH] [tbase-1225] --- src/client/src/tscServer.c | 26 +++++++++++++++++++------- src/util/src/tglobalcfg.c | 2 +- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 40db4d934f..6a66b860d7 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -59,6 +59,22 @@ void tscPrintMgmtIp() { } #endif +/* + * For each management node, try twice at least in case of poor network situation. + * If the client start to connect to a non-management node from the client, and the first retry may fail due to + * the poor network quality. And then, the second retry get the response with redirection command. + * The retry will not be executed since only *two* retry is allowed in case of single management node in the cluster. + * Therefore, we need to multiply the retry times by factor of 2 to fix this problem. + */ +static int32_t tscGetMgmtConnMaxRetryTimes() { + int32_t factor = 2; +#ifdef CLUSTER + return tscMgmtIpList.numOfIps * factor; +#else + return 1*factor; +#endif +} + void tscProcessHeartBeatRsp(void *param, TAOS_RES *tres, int code) { STscObj *pObj = (STscObj *)param; if (pObj == NULL) return; @@ -134,18 +150,17 @@ void tscProcessActivityTimer(void *handle, void *tmrId) { tscProcessSql(pObj->pHb); } -//TODO HANDLE error from mgmt void tscGetConnToMgmt(SSqlObj *pSql, uint8_t *pCode) { STscObj *pTscObj = pSql->pTscObj; #ifdef CLUSTER - if (pSql->retry < tscMgmtIpList.numOfIps) { + if (pSql->retry < tscGetMgmtConnMaxRetryTimes()) { *pCode = 0; pSql->retry++; pSql->index = pSql->index % tscMgmtIpList.numOfIps; if (pSql->cmd.command > TSDB_SQL_READ && pSql->index == 0) pSql->index = 1; void *thandle = taosGetConnFromCache(tscConnCache, tscMgmtIpList.ip[pSql->index], TSC_MGMT_VNODE, pTscObj->user); #else - if (pSql->retry < 1) { + if (pSql->retry < tscGetMgmtConnMaxRetryTimes()) { *pCode = 0; pSql->retry++; void *thandle = taosGetConnFromCache(tscConnCache, tsServerIp, TSC_MGMT_VNODE, pTscObj->user); @@ -444,16 +459,13 @@ void *tscProcessMsgFromServer(char *msg, void *ahandle, void *thandle) { } } else { uint16_t rspCode = pMsg->content[0]; -#ifdef CLUSTER +#ifdef CLUSTER if (rspCode == TSDB_CODE_REDIRECT) { tscTrace("%p it shall be redirected!", pSql); taosAddConnIntoCache(tscConnCache, thandle, pSql->ip, pSql->vnode, pObj->user); pSql->thandle = NULL; - - // reset the retry times for a new mgmt node - pSql->retry = 0; if (pCmd->command > TSDB_SQL_MGMT) { tscProcessMgmtRedirect(pSql, pMsg->content + 1); diff --git a/src/util/src/tglobalcfg.c b/src/util/src/tglobalcfg.c index 6991e6b8b6..0dd0e4e2ba 100644 --- a/src/util/src/tglobalcfg.c +++ b/src/util/src/tglobalcfg.c @@ -510,7 +510,7 @@ static void doInitGlobalConfig() { 0, TSDB_MAX_VNODES, 0, TSDB_CFG_UTYPE_NONE); tsInitConfigOption(cfg++, "tables", &tsSessionsPerVnode, TSDB_CFG_VTYPE_INT, TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_SHOW, - 4, 220000, 0, TSDB_CFG_UTYPE_NONE); + TSDB_MIN_TABLES_PER_VNODE, TSDB_MAX_TABLES_PER_VNODE, 0, TSDB_CFG_UTYPE_NONE); tsInitConfigOption(cfg++, "cache", &tsCacheBlockSize, TSDB_CFG_VTYPE_INT, TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_SHOW, 100, 1048576, 0, TSDB_CFG_UTYPE_BYTE); -- GitLab