Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
189cdc4f
T
TDengine
项目概览
taosdata
/
TDengine
大约 1 年 前同步成功
通知
1184
Star
22015
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
189cdc4f
编写于
11月 30, 2022
作者:
S
Shengliang Guan
提交者:
GitHub
11月 30, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #18533 from taosdata/enh/clientRetry
enh: refactor retry
上级
0357ee12
b7cba5d6
变更
21
隐藏空白更改
内联
并排
Showing
21 changed file
with
413 addition
and
87 deletion
+413
-87
include/common/tglobal.h
include/common/tglobal.h
+4
-0
include/libs/qcom/query.h
include/libs/qcom/query.h
+9
-2
include/libs/transport/trpc.h
include/libs/transport/trpc.h
+5
-0
include/util/taoserror.h
include/util/taoserror.h
+2
-0
source/client/src/clientEnv.c
source/client/src/clientEnv.c
+4
-0
source/common/src/tglobal.c
source/common/src/tglobal.c
+10
-2
source/dnode/mgmt/node_mgmt/src/dmTransport.c
source/dnode/mgmt/node_mgmt/src/dmTransport.c
+5
-0
source/dnode/vnode/src/vnd/vnodeSync.c
source/dnode/vnode/src/vnd/vnodeSync.c
+1
-1
source/libs/scheduler/inc/schInt.h
source/libs/scheduler/inc/schInt.h
+22
-0
source/libs/scheduler/src/schRemote.c
source/libs/scheduler/src/schRemote.c
+8
-3
source/libs/scheduler/src/schTask.c
source/libs/scheduler/src/schTask.c
+145
-22
source/libs/scheduler/src/schUtil.c
source/libs/scheduler/src/schUtil.c
+21
-0
source/libs/scheduler/src/scheduler.c
source/libs/scheduler/src/scheduler.c
+6
-0
source/libs/transport/inc/transComm.h
source/libs/transport/inc/transComm.h
+11
-3
source/libs/transport/inc/transportInt.h
source/libs/transport/inc/transportInt.h
+5
-0
source/libs/transport/src/trans.c
source/libs/transport/src/trans.c
+5
-0
source/libs/transport/src/transCli.c
source/libs/transport/src/transCli.c
+137
-43
source/libs/transport/src/transSvr.c
source/libs/transport/src/transSvr.c
+9
-9
source/util/src/terror.c
source/util/src/terror.c
+2
-0
tests/parallel_test/cases.task
tests/parallel_test/cases.task
+1
-1
tests/system-test/0-others/sysinfo.py
tests/system-test/0-others/sysinfo.py
+1
-1
未找到文件。
include/common/tglobal.h
浏览文件 @
189cdc4f
...
...
@@ -92,6 +92,10 @@ extern int32_t tsQueryNodeChunkSize;
extern
bool
tsQueryUseNodeAllocator
;
extern
bool
tsKeepColumnName
;
extern
bool
tsEnableQueryHb
;
extern
int32_t
tsRedirectPeriod
;
extern
int32_t
tsRedirectFactor
;
extern
int32_t
tsRedirectMaxPeriod
;
extern
int32_t
tsMaxRetryWaitTime
;
// client
extern
int32_t
tsMinSlidingTime
;
...
...
include/libs/qcom/query.h
浏览文件 @
189cdc4f
...
...
@@ -259,9 +259,15 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t
#define NEED_CLIENT_HANDLE_ERROR(_code) \
(NEED_CLIENT_RM_TBLMETA_ERROR(_code) || NEED_CLIENT_REFRESH_VG_ERROR(_code) || \
NEED_CLIENT_REFRESH_TBLMETA_ERROR(_code))
#define SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR)
#define SYNC_SELF_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR)
#define SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) (false) // used later
#define NEED_REDIRECT_ERROR(_code) \
((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || \
(_code) == TSDB_CODE_NODE_NOT_DEPLOYED || (_code) == TSDB_CODE_SYN_NOT_LEADER || \
(_code) == TSDB_CODE_NODE_NOT_DEPLOYED || SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) || \
SYNC_SELF_LEADER_REDIRECT_ERROR(_code) || SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) || \
(_code) == TSDB_CODE_APP_NOT_READY || (_code) == TSDB_CODE_RPC_BROKEN_LINK)
#define NEED_CLIENT_RM_TBLMETA_REQ(_type) \
...
...
@@ -270,7 +276,8 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t
#define NEED_SCHEDULER_REDIRECT_ERROR(_code) \
((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_NODE_NOT_DEPLOYED || \
(_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_APP_NOT_READY)
SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) || SYNC_SELF_LEADER_REDIRECT_ERROR(_code) || \
SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) || (_code) == TSDB_CODE_APP_NOT_READY)
#define REQUEST_TOTAL_EXEC_TIMES 2
...
...
include/libs/transport/trpc.h
浏览文件 @
189cdc4f
...
...
@@ -85,6 +85,11 @@ typedef struct SRpcInit {
int32_t
retryLimit
;
// retry limit
int32_t
retryInterval
;
// retry interval ms
int32_t
retryMinInterval
;
// retry init interval
int32_t
retryStepFactor
;
// retry interval factor
int32_t
retryMaxInterval
;
// retry max interval
int64_t
retryMaxTimouet
;
int32_t
compressSize
;
// -1: no compress, 0 : all data compressed, size: compress data if larger than size
int8_t
encryption
;
// encrypt or not
...
...
include/util/taoserror.h
浏览文件 @
189cdc4f
...
...
@@ -92,6 +92,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_NO_AVAIL_DISK TAOS_DEF_ERROR_CODE(0, 0x0129)
#define TSDB_CODE_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x012A)
#define TSDB_CODE_NO_DISKSPACE TAOS_DEF_ERROR_CODE(0, 0x012B)
#define TSDB_CODE_TIMEOUT_ERROR TAOS_DEF_ERROR_CODE(0, 0x012C)
//client
#define TSDB_CODE_TSC_INVALID_OPERATION TAOS_DEF_ERROR_CODE(0, 0x0200)
...
...
@@ -415,6 +416,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_SYN_PROPOSE_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0911)
#define TSDB_CODE_SYN_STANDBY_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0912)
#define TSDB_CODE_SYN_BATCH_ERROR TAOS_DEF_ERROR_CODE(0, 0x0913)
#define TSDB_CODE_SYN_RESTORING TAOS_DEF_ERROR_CODE(0, 0x0914)
#define TSDB_CODE_SYN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x09FF)
// tq
...
...
source/client/src/clientEnv.c
浏览文件 @
189cdc4f
...
...
@@ -148,6 +148,10 @@ void *openTransporter(const char *user, const char *auth, int32_t numOfThread) {
rpcInit
.
dfp
=
destroyAhandle
;
rpcInit
.
retryLimit
=
tsRpcRetryLimit
;
rpcInit
.
retryInterval
=
tsRpcRetryInterval
;
rpcInit
.
retryMinInterval
=
tsRedirectPeriod
;
rpcInit
.
retryStepFactor
=
tsRedirectFactor
;
rpcInit
.
retryMaxInterval
=
tsRedirectMaxPeriod
;
rpcInit
.
retryMaxTimouet
=
tsMaxRetryWaitTime
;
void
*
pDnodeConn
=
rpcOpen
(
&
rpcInit
);
if
(
pDnodeConn
==
NULL
)
{
...
...
source/common/src/tglobal.c
浏览文件 @
189cdc4f
...
...
@@ -16,9 +16,9 @@
#define _DEFAULT_SOURCE
#include "tglobal.h"
#include "tconfig.h"
#include "tmisce.h"
#include "tgrant.h"
#include "tlog.h"
#include "tmisce.h"
GRANT_CFG_DECLARE
;
...
...
@@ -86,6 +86,10 @@ bool tsQueryPlannerTrace = false;
int32_t
tsQueryNodeChunkSize
=
32
*
1024
;
bool
tsQueryUseNodeAllocator
=
true
;
bool
tsKeepColumnName
=
false
;
int32_t
tsRedirectPeriod
=
10
;
int32_t
tsRedirectFactor
=
2
;
int32_t
tsRedirectMaxPeriod
=
1000
;
int32_t
tsMaxRetryWaitTime
=
10000
;
/*
* denote if the server needs to compress response message at the application layer to client, including query rsp,
...
...
@@ -120,7 +124,7 @@ int32_t tsMinIntervalTime = 1;
int32_t
tsMaxMemUsedByInsert
=
1024
;
float
tsSelectivityRatio
=
1
.
0
;
int32_t
tsTagFilterResCacheSize
=
1024
*
10
;
int32_t
tsTagFilterResCacheSize
=
1024
*
10
;
// the maximum allowed query buffer size during query processing for each data node.
// -1 no limit (default)
...
...
@@ -305,6 +309,7 @@ static int32_t taosAddClientCfg(SConfig *pCfg) {
if
(
cfgAddInt32
(
pCfg
,
"maxMemUsedByInsert"
,
tsMaxMemUsedByInsert
,
1
,
INT32_MAX
,
true
)
!=
0
)
return
-
1
;
if
(
cfgAddInt32
(
pCfg
,
"rpcRetryLimit"
,
tsRpcRetryLimit
,
1
,
100000
,
0
)
!=
0
)
return
-
1
;
if
(
cfgAddInt32
(
pCfg
,
"rpcRetryInterval"
,
tsRpcRetryInterval
,
1
,
100000
,
0
)
!=
0
)
return
-
1
;
if
(
cfgAddInt32
(
pCfg
,
"maxRetryWaitTime"
,
tsMaxRetryWaitTime
,
0
,
86400000
,
0
)
!=
0
)
return
-
1
;
tsNumOfTaskQueueThreads
=
tsNumOfCores
/
2
;
tsNumOfTaskQueueThreads
=
TMAX
(
tsNumOfTaskQueueThreads
,
4
);
...
...
@@ -659,6 +664,7 @@ static int32_t taosSetClientCfg(SConfig *pCfg) {
tsRpcRetryLimit
=
cfgGetItem
(
pCfg
,
"rpcRetryLimit"
)
->
i32
;
tsRpcRetryInterval
=
cfgGetItem
(
pCfg
,
"rpcRetryInterval"
)
->
i32
;
tsMaxRetryWaitTime
=
cfgGetItem
(
pCfg
,
"maxRetryWaitTime"
)
->
i32
;
return
0
;
}
...
...
@@ -874,6 +880,8 @@ int32_t taosSetCfg(SConfig *pCfg, char *name) {
tsMaxNumOfDistinctResults
=
cfgGetItem
(
pCfg
,
"maxNumOfDistinctRes"
)
->
i32
;
}
else
if
(
strcasecmp
(
"maxMemUsedByInsert"
,
name
)
==
0
)
{
tsMaxMemUsedByInsert
=
cfgGetItem
(
pCfg
,
"maxMemUsedByInsert"
)
->
i32
;
}
else
if
(
strcasecmp
(
"maxRetryWaitTime"
,
name
)
==
0
)
{
tsMaxRetryWaitTime
=
cfgGetItem
(
pCfg
,
"maxRetryWaitTime"
)
->
i32
;
}
break
;
}
...
...
source/dnode/mgmt/node_mgmt/src/dmTransport.c
浏览文件 @
189cdc4f
...
...
@@ -258,8 +258,13 @@ int32_t dmInitClient(SDnode *pDnode) {
rpcInit
.
parent
=
pDnode
;
rpcInit
.
rfp
=
rpcRfp
;
rpcInit
.
compressSize
=
tsCompressMsgSize
;
rpcInit
.
retryLimit
=
tsRpcRetryLimit
;
rpcInit
.
retryInterval
=
tsRpcRetryInterval
;
rpcInit
.
retryMinInterval
=
tsRedirectPeriod
;
rpcInit
.
retryStepFactor
=
tsRedirectFactor
;
rpcInit
.
retryMaxInterval
=
tsRedirectMaxPeriod
;
rpcInit
.
retryMaxTimouet
=
tsMaxRetryWaitTime
;
pTrans
->
clientRpc
=
rpcOpen
(
&
rpcInit
);
if
(
pTrans
->
clientRpc
==
NULL
)
{
...
...
source/dnode/vnode/src/vnd/vnodeSync.c
浏览文件 @
189cdc4f
...
...
@@ -66,7 +66,7 @@ void vnodeRedirectRpcMsg(SVnode *pVnode, SRpcMsg *pMsg) {
}
pMsg
->
info
.
hasEpSet
=
1
;
SRpcMsg
rsp
=
{.
code
=
TSDB_CODE_
RPC_REDIRECT
,
.
info
=
pMsg
->
info
,
.
msgType
=
pMsg
->
msgType
+
1
};
SRpcMsg
rsp
=
{.
code
=
TSDB_CODE_
SYN_NOT_LEADER
,
.
info
=
pMsg
->
info
,
.
msgType
=
pMsg
->
msgType
+
1
};
tmsgSendRedirectRsp
(
&
rsp
,
&
newEpSet
);
}
...
...
source/libs/scheduler/inc/schInt.h
浏览文件 @
189cdc4f
...
...
@@ -27,6 +27,7 @@ extern "C" {
#include "tarray.h"
#include "thash.h"
#include "trpc.h"
#include "ttimer.h"
enum
{
SCH_READ
=
1
,
...
...
@@ -146,6 +147,7 @@ typedef struct SSchedulerMgmt {
int32_t
jobRef
;
int32_t
jobNum
;
SSchStat
stat
;
void
*
timer
;
SRWLatch
hbLock
;
SHashObj
*
hbConnections
;
void
*
queryMgmt
;
...
...
@@ -202,12 +204,30 @@ typedef struct SSchTaskProfile {
int64_t
endTs
;
}
SSchTaskProfile
;
typedef
struct
SSchRedirectCtx
{
int32_t
periodMs
;
bool
inRedirect
;
int32_t
totalTimes
;
int32_t
roundTotal
;
int32_t
roundTimes
;
// retry times in current round
int64_t
startTs
;
}
SSchRedirectCtx
;
typedef
struct
SSchTimerParam
{
int64_t
rId
;
uint64_t
queryId
;
uint64_t
taskId
;
}
SSchTimerParam
;
typedef
struct
SSchTask
{
uint64_t
taskId
;
// task id
SRWLatch
lock
;
// task reentrant lock
int32_t
maxExecTimes
;
// task max exec times
int32_t
maxRetryTimes
;
// task max retry times
int32_t
retryTimes
;
// task retry times
int32_t
delayExecMs
;
// task execution delay time
tmr_h
delayTimer
;
// task delay execution timer
SSchRedirectCtx
redirectCtx
;
// task redirect context
bool
waitRetry
;
// wait for retry
int32_t
execId
;
// task current execute index
SSchLevel
*
level
;
// level
...
...
@@ -488,6 +508,7 @@ extern SSchedulerMgmt schMgmt;
void
schDeregisterTaskHb
(
SSchJob
*
pJob
,
SSchTask
*
pTask
);
void
schCleanClusterHb
(
void
*
pTrans
);
int32_t
schLaunchTask
(
SSchJob
*
job
,
SSchTask
*
task
);
int32_t
schDelayLaunchTask
(
SSchJob
*
pJob
,
SSchTask
*
pTask
);
int32_t
schBuildAndSendMsg
(
SSchJob
*
job
,
SSchTask
*
task
,
SQueryNodeAddr
*
addr
,
int32_t
msgType
);
SSchJob
*
schAcquireJob
(
int64_t
refId
);
int32_t
schReleaseJob
(
int64_t
refId
);
...
...
@@ -529,6 +550,7 @@ int32_t schJobFetchRows(SSchJob *pJob);
int32_t
schJobFetchRowsA
(
SSchJob
*
pJob
);
int32_t
schUpdateTaskHandle
(
SSchJob
*
pJob
,
SSchTask
*
pTask
,
bool
dropExecNode
,
void
*
handle
,
int32_t
execId
);
int32_t
schProcessOnTaskStatusRsp
(
SQueryNodeEpId
*
pEpId
,
SArray
*
pStatusList
);
char
*
schDumpEpSet
(
SEpSet
*
pEpSet
);
char
*
schGetOpStr
(
SCH_OP_TYPE
type
);
int32_t
schBeginOperation
(
SSchJob
*
pJob
,
SCH_OP_TYPE
type
,
bool
sync
);
int32_t
schInitJob
(
int64_t
*
pJobId
,
SSchedulerReq
*
pReq
);
...
...
source/libs/scheduler/src/schRemote.c
浏览文件 @
189cdc4f
...
...
@@ -887,9 +887,14 @@ int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, SSchTrans *trans, SQuery
SCH_ERR_JRET
(
schGenerateCallBackInfo
(
pJob
,
pTask
,
msg
,
msgSize
,
msgType
,
trans
,
isHb
,
&
pMsgSendInfo
));
SCH_ERR_JRET
(
schUpdateSendTargetInfo
(
pMsgSendInfo
,
addr
,
pTask
));
qDebug
(
"start to send %s msg to node[%d,%s,%d], pTrans:%p, pHandle:%p"
,
TMSG_INFO
(
msgType
),
addr
->
nodeId
,
epSet
->
eps
[
epSet
->
inUse
].
fqdn
,
epSet
->
eps
[
epSet
->
inUse
].
port
,
trans
->
pTrans
,
trans
->
pHandle
);
if
(
pJob
&&
pTask
)
{
SCH_TASK_DLOG
(
"start to send %s msg to node[%d,%s,%d], pTrans:%p, pHandle:%p"
,
TMSG_INFO
(
msgType
),
addr
->
nodeId
,
epSet
->
eps
[
epSet
->
inUse
].
fqdn
,
epSet
->
eps
[
epSet
->
inUse
].
port
,
trans
->
pTrans
,
trans
->
pHandle
);
}
else
{
qDebug
(
"start to send %s msg to node[%d,%s,%d], pTrans:%p, pHandle:%p"
,
TMSG_INFO
(
msgType
),
addr
->
nodeId
,
epSet
->
eps
[
epSet
->
inUse
].
fqdn
,
epSet
->
eps
[
epSet
->
inUse
].
port
,
trans
->
pTrans
,
trans
->
pHandle
);
}
if
(
pTask
)
{
pTask
->
lastMsgType
=
msgType
;
}
...
...
source/libs/scheduler/src/schTask.c
浏览文件 @
189cdc4f
...
...
@@ -340,6 +340,70 @@ int32_t schRescheduleTask(SSchJob *pJob, SSchTask *pTask) {
return
TSDB_CODE_SUCCESS
;
}
int32_t
schChkUpdateRedirectCtx
(
SSchJob
*
pJob
,
SSchTask
*
pTask
,
SEpSet
*
pEpSet
)
{
SSchRedirectCtx
*
pCtx
=
&
pTask
->
redirectCtx
;
if
(
!
pCtx
->
inRedirect
)
{
pCtx
->
inRedirect
=
true
;
pCtx
->
periodMs
=
tsRedirectPeriod
;
pCtx
->
startTs
=
taosGetTimestampMs
();
if
(
SCH_IS_DATA_BIND_TASK
(
pTask
))
{
if
(
pEpSet
)
{
pCtx
->
roundTotal
=
pEpSet
->
numOfEps
;
}
else
{
SQueryNodeAddr
*
pAddr
=
taosArrayGet
(
pTask
->
candidateAddrs
,
0
);
pCtx
->
roundTotal
=
pAddr
->
epSet
.
numOfEps
;
}
}
else
{
pCtx
->
roundTotal
=
1
;
}
goto
_return
;
}
pCtx
->
totalTimes
++
;
if
(
SCH_IS_DATA_BIND_TASK
(
pTask
)
&&
pEpSet
)
{
pCtx
->
roundTotal
=
pEpSet
->
numOfEps
;
pCtx
->
roundTimes
=
0
;
pTask
->
delayExecMs
=
0
;
goto
_return
;
}
pCtx
->
roundTimes
++
;
if
(
pCtx
->
roundTimes
>=
pCtx
->
roundTotal
)
{
int64_t
nowTs
=
taosGetTimestampMs
();
int64_t
lastTime
=
nowTs
-
pCtx
->
startTs
;
if
(
lastTime
>
tsMaxRetryWaitTime
)
{
SCH_TASK_DLOG
(
"task no more redirect retry since timeout, now:%"
PRId64
", start:%"
PRId64
", max:%d, total:%d"
,
nowTs
,
pCtx
->
startTs
,
tsMaxRetryWaitTime
,
pCtx
->
totalTimes
);
SCH_ERR_RET
(
TSDB_CODE_TIMEOUT_ERROR
);
}
pCtx
->
periodMs
*=
tsRedirectFactor
;
if
(
pCtx
->
periodMs
>
tsRedirectMaxPeriod
)
{
pCtx
->
periodMs
=
tsRedirectMaxPeriod
;
}
int64_t
leftTime
=
tsMaxRetryWaitTime
-
lastTime
;
pTask
->
delayExecMs
=
leftTime
<
pCtx
->
periodMs
?
leftTime
:
pCtx
->
periodMs
;
goto
_return
;
}
pTask
->
delayExecMs
=
0
;
_return:
SCH_TASK_DLOG
(
"task start %d/%d/%d redirect retry, delayExec:%d"
,
pCtx
->
roundTimes
,
pCtx
->
roundTotal
,
pCtx
->
totalTimes
,
pTask
->
delayExecMs
);
return
TSDB_CODE_SUCCESS
;
}
int32_t
schDoTaskRedirect
(
SSchJob
*
pJob
,
SSchTask
*
pTask
,
SDataBuf
*
pData
,
int32_t
rspCode
)
{
int32_t
code
=
0
;
...
...
@@ -349,14 +413,10 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf *pData, int32
pTask
->
retryTimes
=
0
;
}
if
(((
pTask
->
execId
+
1
)
>=
pTask
->
maxExecTimes
)
||
((
pTask
->
retryTimes
+
1
)
>
pTask
->
maxRetryTimes
))
{
SCH_TASK_DLOG
(
"task no more retry since reach max times %d:%d, execId %d"
,
pTask
->
maxRetryTimes
,
pTask
->
maxExecTimes
,
pTask
->
execId
);
schHandleJobFailure
(
pJob
,
rspCode
);
return
TSDB_CODE_SUCCESS
;
}
SCH_ERR_JRET
(
schChkUpdateRedirectCtx
(
pJob
,
pTask
,
pData
?
pData
->
pEpSet
:
NULL
));
pTask
->
waitRetry
=
true
;
schDropTaskOnExecNode
(
pJob
,
pTask
);
taosHashClear
(
pTask
->
execNodes
);
schRemoveTaskFromExecList
(
pJob
,
pTask
);
...
...
@@ -368,8 +428,17 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf *pData, int32
memset
(
&
pTask
->
succeedAddr
,
0
,
sizeof
(
pTask
->
succeedAddr
));
if
(
SCH_IS_DATA_BIND_TASK
(
pTask
))
{
if
(
pData
)
{
if
(
pData
&&
pData
->
pEpSet
)
{
SCH_ERR_JRET
(
schUpdateTaskCandidateAddr
(
pJob
,
pTask
,
pData
->
pEpSet
));
}
else
if
(
SYNC_UNKNOWN_LEADER_REDIRECT_ERROR
(
rspCode
))
{
SQueryNodeAddr
*
addr
=
taosArrayGet
(
pTask
->
candidateAddrs
,
pTask
->
candidateIdx
);
SCH_SWITCH_EPSET
(
addr
);
SCH_TASK_DLOG
(
"switch task target node %d epset to %d/%d"
,
addr
->
nodeId
,
addr
->
epSet
.
inUse
,
addr
->
epSet
.
numOfEps
);
}
else
{
SQueryNodeAddr
*
addr
=
taosArrayGet
(
pTask
->
candidateAddrs
,
pTask
->
candidateIdx
);
SEp
*
pEp
=
&
addr
->
epSet
.
eps
[
addr
->
epSet
.
inUse
];
SCH_TASK_DLOG
(
"task retry node %d current ep, idx:%d/%d,%s:%d"
,
addr
->
nodeId
,
addr
->
epSet
.
inUse
,
addr
->
epSet
.
numOfEps
,
pEp
->
fqdn
,
pEp
->
port
);
}
if
(
SCH_TASK_NEED_FLOW_CTRL
(
pJob
,
pTask
))
{
...
...
@@ -380,7 +449,7 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf *pData, int32
SCH_SET_TASK_STATUS
(
pTask
,
JOB_TASK_STATUS_INIT
);
SCH_ERR_JRET
(
schLaunchTask
(
pJob
,
pTask
));
SCH_ERR_JRET
(
sch
Delay
LaunchTask
(
pJob
,
pTask
));
return
TSDB_CODE_SUCCESS
;
}
...
...
@@ -428,28 +497,24 @@ int32_t schHandleRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf *pData, int32
schUpdateJobStatus
(
pJob
,
JOB_TASK_STATUS_EXEC
);
}
if
(
S
CH_IS_DATA_BIND_TASK
(
pTask
))
{
if
(
S
YNC_OTHER_LEADER_REDIRECT_ERROR
(
rspCode
))
{
if
(
NULL
==
pData
->
pEpSet
)
{
SCH_TASK_ELOG
(
"
no epset updated while got error
%s"
,
tstrerror
(
rspCode
));
code
=
rspCode
;
SCH_TASK_ELOG
(
"
epset updating excepted, error:
%s"
,
tstrerror
(
rspCode
));
code
=
TSDB_CODE_INVALID_MSG
;
goto
_return
;
}
}
code
=
schDoTaskRedirect
(
pJob
,
pTask
,
pData
,
rspCode
);
taosMemoryFree
(
pData
->
pData
);
taosMemoryFree
(
pData
->
pEpSet
);
pData
->
pData
=
NULL
;
pData
->
pEpSet
=
NULL
;
taosMemoryFreeClear
(
pData
->
pData
);
taosMemoryFreeClear
(
pData
->
pEpSet
);
SCH_RET
(
code
);
_return:
taosMemoryFree
(
pData
->
pData
);
taosMemoryFree
(
pData
->
pEpSet
);
pData
->
pData
=
NULL
;
pData
->
pEpSet
=
NULL
;
taosMemoryFreeClear
(
pData
->
pData
);
taosMemoryFreeClear
(
pData
->
pEpSet
);
SCH_RET
(
schProcessOnTaskFailure
(
pJob
,
pTask
,
code
));
}
...
...
@@ -715,10 +780,13 @@ int32_t schUpdateTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask, SEpSet *pEpSe
SQueryNodeAddr
*
pAddr
=
taosArrayGet
(
pTask
->
candidateAddrs
,
0
);
SEp
*
pOld
=
&
pAddr
->
epSet
.
eps
[
pAddr
->
epSet
.
inUse
];
SEp
*
pNew
=
&
pEpSet
->
eps
[
pEpSet
->
inUse
];
char
*
origEpset
=
schDumpEpSet
(
&
pAddr
->
epSet
);
char
*
newEpset
=
schDumpEpSet
(
pEpSet
);
SCH_TASK_DLOG
(
"update task target node %d epset from %s to %s"
,
pAddr
->
nodeId
,
origEpset
,
newEpset
);
SCH_TASK_DLOG
(
"update task ep from %s:%d to %s:%d"
,
pOld
->
fqdn
,
pOld
->
port
,
pNew
->
fqdn
,
pNew
->
port
);
taosMemoryFree
(
origEpset
);
taosMemoryFree
(
newEpset
);
memcpy
(
&
pAddr
->
epSet
,
pEpSet
,
sizeof
(
pAddr
->
epSet
));
...
...
@@ -1078,6 +1146,56 @@ _return:
SCH_RET
(
schProcessOnTaskFailure
(
pJob
,
pTask
,
code
));
}
void
schHandleTimerEvent
(
void
*
param
,
void
*
tmrId
)
{
SSchTimerParam
*
pTimerParam
=
(
SSchTimerParam
*
)
param
;
SSchTask
*
pTask
=
NULL
;
SSchJob
*
pJob
=
NULL
;
int32_t
code
=
0
;
int64_t
rId
=
pTimerParam
->
rId
;
uint64_t
queryId
=
pTimerParam
->
queryId
;
uint64_t
taskId
=
pTimerParam
->
taskId
;
taosMemoryFree
(
pTimerParam
);
if
(
schProcessOnCbBegin
(
&
pJob
,
&
pTask
,
queryId
,
rId
,
taskId
))
{
return
;
}
code
=
schLaunchTask
(
pJob
,
pTask
);
schProcessOnCbEnd
(
pJob
,
pTask
,
code
);
}
int32_t
schDelayLaunchTask
(
SSchJob
*
pJob
,
SSchTask
*
pTask
)
{
if
(
pTask
->
delayExecMs
>
0
)
{
SSchTimerParam
*
param
=
taosMemoryMalloc
(
sizeof
(
SSchTimerParam
));
if
(
NULL
==
param
)
{
SCH_TASK_ELOG
(
"taosMemoryMalloc %d failed"
,
(
int
)
sizeof
(
SSchTimerParam
));
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
param
->
rId
=
pJob
->
refId
;
param
->
queryId
=
pJob
->
queryId
;
param
->
taskId
=
pTask
->
taskId
;
if
(
NULL
==
pTask
->
delayTimer
)
{
pTask
->
delayTimer
=
taosTmrStart
(
schHandleTimerEvent
,
pTask
->
delayExecMs
,
(
void
*
)
param
,
schMgmt
.
timer
);
if
(
NULL
==
pTask
->
delayTimer
)
{
SCH_TASK_ELOG
(
"start delay timer failed, handle:%p"
,
schMgmt
.
timer
);
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
return
TSDB_CODE_SUCCESS
;
}
taosTmrReset
(
schHandleTimerEvent
,
pTask
->
delayExecMs
,
(
void
*
)
param
,
schMgmt
.
timer
,
&
pTask
->
delayTimer
);
return
TSDB_CODE_SUCCESS
;
}
SCH_RET
(
schLaunchTask
(
pJob
,
pTask
));
}
int32_t
schLaunchLevelTasks
(
SSchJob
*
pJob
,
SSchLevel
*
level
)
{
SCH_ERR_RET
(
schChkJobNeedFlowCtrl
(
pJob
,
level
));
...
...
@@ -1099,7 +1217,12 @@ void schDropTaskInHashList(SSchJob *pJob, SHashObj *list) {
while
(
pIter
)
{
SSchTask
*
pTask
=
*
(
SSchTask
**
)
pIter
;
SCH_LOCK_TASK
(
pTask
);
if
(
pTask
->
delayTimer
)
{
taosTmrStopA
(
&
pTask
->
delayTimer
);
}
schDropTaskOnExecNode
(
pJob
,
pTask
);
SCH_UNLOCK_TASK
(
pTask
);
pIter
=
taosHashIterate
(
list
,
pIter
);
}
...
...
source/libs/scheduler/src/schUtil.c
浏览文件 @
189cdc4f
...
...
@@ -36,6 +36,27 @@ FORCE_INLINE int32_t schReleaseJob(int64_t refId) {
return
taosReleaseRef
(
schMgmt
.
jobRef
,
refId
);
}
char
*
schDumpEpSet
(
SEpSet
*
pEpSet
)
{
if
(
NULL
==
pEpSet
)
{
return
NULL
;
}
int32_t
maxSize
=
1024
;
char
*
str
=
taosMemoryMalloc
(
maxSize
);
if
(
NULL
==
str
)
{
return
NULL
;
}
int32_t
n
=
0
;
n
+=
snprintf
(
str
+
n
,
maxSize
-
n
,
"numOfEps:%d, inUse:%d eps:"
,
pEpSet
->
numOfEps
,
pEpSet
->
inUse
);
for
(
int32_t
i
=
0
;
i
<
pEpSet
->
numOfEps
;
++
i
)
{
SEp
*
pEp
=
&
pEpSet
->
eps
[
i
];
n
+=
snprintf
(
str
+
n
,
maxSize
-
n
,
"[%s:%d]"
,
pEp
->
fqdn
,
pEp
->
port
);
}
return
str
;
}
char
*
schGetOpStr
(
SCH_OP_TYPE
type
)
{
switch
(
type
)
{
case
SCH_OP_NULL
:
...
...
source/libs/scheduler/src/scheduler.c
浏览文件 @
189cdc4f
...
...
@@ -48,6 +48,12 @@ int32_t schedulerInit() {
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
schMgmt
.
timer
=
taosTmrInit
(
0
,
0
,
0
,
"scheduler"
);
if
(
NULL
==
schMgmt
.
timer
)
{
qError
(
"init timer failed, error:%s"
,
tstrerror
(
terrno
));
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
if
(
taosGetSystemUUID
((
char
*
)
&
schMgmt
.
sId
,
sizeof
(
schMgmt
.
sId
)))
{
qError
(
"generate schdulerId failed, errno:%d"
,
errno
);
SCH_ERR_RET
(
TSDB_CODE_QRY_SYS_ERROR
);
...
...
source/libs/transport/inc/transComm.h
浏览文件 @
189cdc4f
...
...
@@ -137,15 +137,23 @@ typedef struct {
tmsg_t
msgType
;
// message type
int8_t
connType
;
// connection type cli/srv
int8_t
retryCnt
;
int8_t
retryLimit
;
STransCtx
appCtx
;
//
STransMsg
*
pRsp
;
// for synchronous API
tsem_t
*
pSem
;
// for synchronous API
SCvtAddr
cvtAddr
;
bool
setMaxRetry
;
int32_t
retryMinInterval
;
int32_t
retryMaxInterval
;
int32_t
retryStepFactor
;
int64_t
retryMaxTimeout
;
int64_t
retryInitTimestamp
;
int64_t
retryNextInterval
;
bool
retryInit
;
int32_t
retryStep
;
int8_t
epsetRetryCnt
;
int
hThrdIdx
;
}
STransConnCtx
;
...
...
source/libs/transport/inc/transportInt.h
浏览文件 @
189cdc4f
...
...
@@ -52,6 +52,11 @@ typedef struct {
int32_t
retryLimit
;
// retry limit
int32_t
retryInterval
;
// retry interval ms
int32_t
retryMinInterval
;
// retry init interval
int32_t
retryStepFactor
;
// retry interval factor
int32_t
retryMaxInterval
;
// retry max interval
int32_t
retryMaxTimouet
;
void
(
*
cfp
)(
void
*
parent
,
SRpcMsg
*
,
SEpSet
*
);
bool
(
*
retry
)(
int32_t
code
,
tmsg_t
msgType
);
bool
(
*
startTimer
)(
int32_t
code
,
tmsg_t
msgType
);
...
...
source/libs/transport/src/trans.c
浏览文件 @
189cdc4f
...
...
@@ -51,6 +51,11 @@ void* rpcOpen(const SRpcInit* pInit) {
pRpc
->
retryLimit
=
pInit
->
retryLimit
;
pRpc
->
retryInterval
=
pInit
->
retryInterval
;
pRpc
->
retryMinInterval
=
pInit
->
retryMinInterval
;
// retry init interval
pRpc
->
retryStepFactor
=
pInit
->
retryStepFactor
;
pRpc
->
retryMaxInterval
=
pInit
->
retryMaxInterval
;
pRpc
->
retryMaxTimouet
=
pInit
->
retryMaxTimouet
;
// register callback handle
pRpc
->
cfp
=
pInit
->
cfp
;
pRpc
->
retry
=
pInit
->
rfp
;
...
...
source/libs/transport/src/transCli.c
浏览文件 @
189cdc4f
...
...
@@ -234,7 +234,7 @@ static void cliWalkCb(uv_handle_t* handle, void* arg);
#define REQUEST_PERSIS_HANDLE(msg) ((msg)->info.persistHandle == 1)
#define REQUEST_RELEASE_HANDLE(cmsg) ((cmsg)->type == Release)
#define EPSET_IS_VALID(epSet) ((epSet) != NULL && (epSet)->numOfEps
!
= 0)
#define EPSET_IS_VALID(epSet) ((epSet) != NULL && (epSet)->numOfEps
>= 0 && (epSet)->inUse >
= 0)
#define EPSET_GET_SIZE(epSet) (epSet)->numOfEps
#define EPSET_GET_INUSE_IP(epSet) ((epSet)->eps[(epSet)->inUse].fqdn)
#define EPSET_GET_INUSE_PORT(epSet) ((epSet)->eps[(epSet)->inUse].port)
...
...
@@ -367,7 +367,7 @@ void cliHandleResp(SCliConn* conn) {
STraceId
*
trace
=
&
transMsg
.
info
.
traceId
;
tGDebug
(
"%s conn %p %s received from %s, local info:%s, len:%d, code str:%s"
,
CONN_GET_INST_LABEL
(
conn
),
conn
,
TMSG_INFO
(
pHead
->
msgType
),
conn
->
dst
,
conn
->
src
,
msgLen
,
tstrerror
(
transMsg
.
code
));
TMSG_INFO
(
pHead
->
msgType
),
conn
->
dst
,
conn
->
src
,
pHead
->
msgLen
,
tstrerror
(
transMsg
.
code
));
if
(
pCtx
==
NULL
&&
CONN_NO_PERSIST_BY_APP
(
conn
))
{
tDebug
(
"%s except, conn %p read while cli ignore it"
,
CONN_GET_INST_LABEL
(
conn
),
conn
);
...
...
@@ -971,7 +971,7 @@ FORCE_INLINE void cliMayCvtFqdnToIp(SEpSet* pEpSet, SCvtAddr* pCvtAddr) {
FORCE_INLINE
bool
cliIsEpsetUpdated
(
int32_t
code
,
STransConnCtx
*
pCtx
)
{
if
(
code
!=
0
)
return
false
;
if
(
pCtx
->
retryCnt
==
0
)
return
false
;
//
if (pCtx->retryCnt == 0) return false;
if
(
transEpSetIsEqual
(
&
pCtx
->
epSet
,
&
pCtx
->
origEpSet
))
return
false
;
return
true
;
}
...
...
@@ -1008,9 +1008,14 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) {
STransConnCtx
*
pCtx
=
pMsg
->
ctx
;
cliMayCvtFqdnToIp
(
&
pCtx
->
epSet
,
&
pThrd
->
cvtAddr
);
char
tbuf
[
256
]
=
{
0
};
EPSET_DEBUG_STR
(
&
pCtx
->
epSet
,
tbuf
);
tDebug
(
"current epset %s"
,
tbuf
);
if
(
!
EPSET_IS_VALID
(
&
pCtx
->
epSet
))
{
destroyCmsg
(
pMsg
);
tError
(
"invalid epset"
);
destroyCmsg
(
pMsg
);
return
;
}
...
...
@@ -1047,12 +1052,14 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) {
addr
.
sin_family
=
AF_INET
;
addr
.
sin_addr
.
s_addr
=
cliGetIpFromFqdnCache
(
pThrd
->
fqdn2ipCache
,
conn
->
ip
);
addr
.
sin_port
=
(
uint16_t
)
htons
((
uint16_t
)
conn
->
port
);
tTrace
(
"%s conn %p try to connect to %s:%d"
,
pTransInst
->
label
,
conn
,
conn
->
ip
,
conn
->
port
);
STraceId
*
trace
=
&
(
pMsg
->
msg
.
info
.
traceId
);
tGTrace
(
"%s conn %p try to connect to %s:%d"
,
pTransInst
->
label
,
conn
,
conn
->
ip
,
conn
->
port
);
int
ret
=
uv_tcp_connect
(
&
conn
->
connReq
,
(
uv_tcp_t
*
)(
conn
->
stream
),
(
const
struct
sockaddr
*
)
&
addr
,
cliConnCb
);
if
(
ret
!=
0
)
{
tTrace
(
"%s conn %p failed to connect to %s:%d, reason:%s"
,
pTransInst
->
label
,
conn
,
conn
->
ip
,
conn
->
port
,
uv_err_name
(
ret
));
t
G
Trace
(
"%s conn %p failed to connect to %s:%d, reason:%s"
,
pTransInst
->
label
,
conn
,
conn
->
ip
,
conn
->
port
,
uv_err_name
(
ret
));
uv_timer_stop
(
conn
->
timer
);
conn
->
timer
->
data
=
NULL
;
...
...
@@ -1378,13 +1385,14 @@ static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) {
STraceId
*
trace
=
&
pMsg
->
msg
.
info
.
traceId
;
char
tbuf
[
256
]
=
{
0
};
EPSET_DEBUG_STR
(
&
pCtx
->
epSet
,
tbuf
);
tGDebug
(
"%s retry on next node,
use %s, retryCnt:%d, limit:%d
"
,
transLabel
(
pThrd
->
pTransInst
),
tbuf
,
pCtx
->
retry
Cnt
+
1
,
pCtx
->
retryLimit
);
tGDebug
(
"%s retry on next node,
use:%s, step: %d,timeout:%"
PRId64
"
"
,
transLabel
(
pThrd
->
pTransInst
),
tbuf
,
pCtx
->
retry
Step
,
pCtx
->
retryNextInterval
);
STaskArg
*
arg
=
taosMemoryMalloc
(
sizeof
(
STaskArg
));
arg
->
param1
=
pMsg
;
arg
->
param2
=
pThrd
;
transDQSched
(
pThrd
->
delayQueue
,
doDelayTask
,
arg
,
pTransInst
->
retryInterval
);
transDQSched
(
pThrd
->
delayQueue
,
doDelayTask
,
arg
,
pCtx
->
retryNextInterval
);
}
FORCE_INLINE
void
cliCompareAndSwap
(
int8_t
*
val
,
int8_t
exp
,
int8_t
newVal
)
{
...
...
@@ -1418,6 +1426,122 @@ FORCE_INLINE bool cliTryExtractEpSet(STransMsg* pResp, SEpSet* dst) {
*
dst
=
epset
;
return
true
;
}
bool
cliResetEpset
(
STransConnCtx
*
pCtx
,
STransMsg
*
pResp
,
bool
hasEpSet
)
{
bool
noDelay
=
true
;
if
(
hasEpSet
==
false
)
{
// assert(pResp->contLen == 0);
if
(
pResp
->
contLen
==
0
)
{
if
(
pCtx
->
epsetRetryCnt
>=
pCtx
->
epSet
.
numOfEps
)
{
noDelay
=
false
;
}
else
{
EPSET_FORWARD_INUSE
(
&
pCtx
->
epSet
);
}
}
else
{
if
(
pCtx
->
epsetRetryCnt
>=
pCtx
->
epSet
.
numOfEps
)
{
noDelay
=
false
;
}
else
{
EPSET_FORWARD_INUSE
(
&
pCtx
->
epSet
);
}
}
}
else
{
SEpSet
epSet
;
int32_t
valid
=
tDeserializeSEpSet
(
pResp
->
pCont
,
pResp
->
contLen
,
&
epSet
);
if
(
valid
<
0
)
{
tDebug
(
"get invalid epset, epset equal, continue"
);
if
(
pCtx
->
epsetRetryCnt
>=
pCtx
->
epSet
.
numOfEps
)
{
noDelay
=
false
;
}
else
{
EPSET_FORWARD_INUSE
(
&
pCtx
->
epSet
);
}
}
else
{
if
(
!
transEpSetIsEqual
(
&
pCtx
->
epSet
,
&
epSet
))
{
tDebug
(
"epset not equal, retry new epset"
);
pCtx
->
epSet
=
epSet
;
noDelay
=
false
;
}
else
{
if
(
pCtx
->
epsetRetryCnt
>=
pCtx
->
epSet
.
numOfEps
)
{
noDelay
=
false
;
}
else
{
tDebug
(
"epset equal, continue"
);
EPSET_FORWARD_INUSE
(
&
pCtx
->
epSet
);
}
}
}
}
return
noDelay
;
}
bool
cliGenRetryRule
(
SCliConn
*
pConn
,
STransMsg
*
pResp
,
SCliMsg
*
pMsg
)
{
SCliThrd
*
pThrd
=
pConn
->
hostThrd
;
STrans
*
pTransInst
=
pThrd
->
pTransInst
;
STransConnCtx
*
pCtx
=
pMsg
->
ctx
;
int32_t
code
=
pResp
->
code
;
bool
retry
=
pTransInst
->
retry
!=
NULL
?
pTransInst
->
retry
(
code
,
pResp
->
msgType
-
1
)
:
false
;
if
(
retry
==
false
)
{
return
false
;
}
if
(
!
pCtx
->
retryInit
)
{
pCtx
->
retryMinInterval
=
pTransInst
->
retryMinInterval
;
pCtx
->
retryMaxInterval
=
pTransInst
->
retryMaxInterval
;
pCtx
->
retryStepFactor
=
pTransInst
->
retryStepFactor
;
pCtx
->
retryMaxTimeout
=
pTransInst
->
retryMaxTimouet
;
pCtx
->
retryInitTimestamp
=
taosGetTimestampMs
();
pCtx
->
retryNextInterval
=
pCtx
->
retryMinInterval
;
pCtx
->
retryStep
=
0
;
pCtx
->
retryInit
=
true
;
}
if
(
-
1
!=
pCtx
->
retryMaxTimeout
&&
taosGetTimestampMs
()
-
pCtx
->
retryInitTimestamp
>=
pCtx
->
retryMaxTimeout
)
{
return
false
;
}
bool
noDelay
=
false
;
if
(
code
==
TSDB_CODE_RPC_BROKEN_LINK
||
code
==
TSDB_CODE_RPC_NETWORK_UNAVAIL
)
{
tDebug
(
"code str %s, contlen:%d 0"
,
tstrerror
(
code
),
pResp
->
contLen
);
noDelay
=
cliResetEpset
(
pCtx
,
pResp
,
false
);
transFreeMsg
(
pResp
->
pCont
);
transUnrefCliHandle
(
pConn
);
}
else
if
(
code
==
TSDB_CODE_SYN_NOT_LEADER
||
code
==
TSDB_CODE_SYN_INTERNAL_ERROR
||
code
==
TSDB_CODE_SYN_PROPOSE_NOT_READY
||
code
==
TSDB_CODE_RPC_REDIRECT
)
{
tDebug
(
"code str %s, contlen:%d 1"
,
tstrerror
(
code
),
pResp
->
contLen
);
noDelay
=
cliResetEpset
(
pCtx
,
pResp
,
true
);
transFreeMsg
(
pResp
->
pCont
);
addConnToPool
(
pThrd
->
pool
,
pConn
);
}
else
if
(
code
==
TSDB_CODE_SYN_RESTORING
)
{
tDebug
(
"code str %s, contlen:%d 0"
,
tstrerror
(
code
),
pResp
->
contLen
);
noDelay
=
cliResetEpset
(
pCtx
,
pResp
,
false
);
addConnToPool
(
pThrd
->
pool
,
pConn
);
transFreeMsg
(
pResp
->
pCont
);
}
else
{
tDebug
(
"code str %s, contlen:%d 0"
,
tstrerror
(
code
),
pResp
->
contLen
);
noDelay
=
cliResetEpset
(
pCtx
,
pResp
,
false
);
addConnToPool
(
pThrd
->
pool
,
pConn
);
transFreeMsg
(
pResp
->
pCont
);
}
if
(
noDelay
==
false
)
{
pCtx
->
epsetRetryCnt
=
1
;
pCtx
->
retryStep
++
;
int64_t
factor
=
pow
(
pCtx
->
retryStepFactor
,
pCtx
->
retryStep
-
1
);
pCtx
->
retryNextInterval
=
factor
*
pCtx
->
retryMinInterval
;
if
(
pCtx
->
retryNextInterval
>=
pCtx
->
retryMaxInterval
)
{
pCtx
->
retryNextInterval
=
pCtx
->
retryMaxInterval
;
}
if
(
-
1
!=
pCtx
->
retryMaxTimeout
&&
taosGetTimestampMs
()
-
pCtx
->
retryInitTimestamp
>=
pCtx
->
retryMaxTimeout
)
{
return
false
;
}
}
else
{
pCtx
->
retryNextInterval
=
0
;
pCtx
->
epsetRetryCnt
++
;
}
pMsg
->
sent
=
0
;
cliSchedMsgToNextNode
(
pMsg
,
pThrd
);
return
true
;
}
int
cliAppCb
(
SCliConn
*
pConn
,
STransMsg
*
pResp
,
SCliMsg
*
pMsg
)
{
SCliThrd
*
pThrd
=
pConn
->
hostThrd
;
STrans
*
pTransInst
=
pThrd
->
pTransInst
;
...
...
@@ -1431,40 +1555,10 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) {
STransConnCtx
*
pCtx
=
pMsg
->
ctx
;
int32_t
code
=
pResp
->
code
;
bool
retry
=
(
pTransInst
->
retry
!=
NULL
&&
pTransInst
->
retry
(
code
,
pResp
->
msgType
-
1
))
?
true
:
false
;
if
(
retry
)
{
pMsg
->
sent
=
0
;
pCtx
->
retryCnt
+=
1
;
if
(
code
==
TSDB_CODE_RPC_NETWORK_UNAVAIL
||
code
==
TSDB_CODE_RPC_BROKEN_LINK
)
{
cliCompareAndSwap
(
&
pCtx
->
retryLimit
,
pTransInst
->
retryLimit
,
EPSET_GET_SIZE
(
&
pCtx
->
epSet
)
*
3
);
if
(
pCtx
->
retryCnt
<
pCtx
->
retryLimit
)
{
transUnrefCliHandle
(
pConn
);
EPSET_FORWARD_INUSE
(
&
pCtx
->
epSet
);
transFreeMsg
(
pResp
->
pCont
);
cliSchedMsgToNextNode
(
pMsg
,
pThrd
);
return
-
1
;
}
}
else
{
cliCompareAndSwap
(
&
pCtx
->
retryLimit
,
pTransInst
->
retryLimit
,
pTransInst
->
retryLimit
);
if
(
pCtx
->
retryCnt
<
pCtx
->
retryLimit
)
{
if
(
pResp
->
contLen
==
0
)
{
EPSET_FORWARD_INUSE
(
&
pCtx
->
epSet
);
}
else
{
if
(
tDeserializeSEpSet
(
pResp
->
pCont
,
pResp
->
contLen
,
&
pCtx
->
epSet
)
<
0
)
{
tError
(
"%s conn %p failed to deserialize epset"
,
CONN_GET_INST_LABEL
(
pConn
),
pConn
);
}
}
addConnToPool
(
pThrd
->
pool
,
pConn
);
transFreeMsg
(
pResp
->
pCont
);
cliSchedMsgToNextNode
(
pMsg
,
pThrd
);
return
-
1
;
}
else
{
// change error code for taos client driver if retryCnt exceeds limit
if
(
0
==
strncmp
(
pTransInst
->
label
,
"TSC"
,
strlen
(
"TSC"
)))
pResp
->
code
=
TSDB_CODE_APP_NOT_READY
;
}
}
bool
retry
=
cliGenRetryRule
(
pConn
,
pResp
,
pMsg
);
if
(
retry
==
true
)
{
return
-
1
;
}
STraceId
*
trace
=
&
pResp
->
info
.
traceId
;
bool
hasEpSet
=
cliTryExtractEpSet
(
pResp
,
&
pCtx
->
epSet
);
...
...
source/libs/transport/src/transSvr.c
浏览文件 @
189cdc4f
...
...
@@ -1229,7 +1229,7 @@ int transReleaseSrvHandle(void* handle) {
m
->
msg
=
tmsg
;
m
->
type
=
Release
;
t
Trace
(
"%s conn %p start to release"
,
transLabel
(
pThrd
->
pTransInst
),
exh
->
handle
);
t
Debug
(
"%s conn %p start to release"
,
transLabel
(
pThrd
->
pTransInst
),
exh
->
handle
);
if
(
0
!=
transAsyncSend
(
pThrd
->
asyncPool
,
&
m
->
q
))
{
destroySmsg
(
m
);
}
...
...
@@ -1237,11 +1237,11 @@ int transReleaseSrvHandle(void* handle) {
transReleaseExHandle
(
transGetRefMgt
(),
refId
);
return
0
;
_return1:
t
Trace
(
"handle %p failed to send to release handle"
,
exh
);
t
Debug
(
"handle %p failed to send to release handle"
,
exh
);
transReleaseExHandle
(
transGetRefMgt
(),
refId
);
return
-
1
;
_return2:
t
Trace
(
"handle %p failed to send to release handle"
,
exh
);
t
Debug
(
"handle %p failed to send to release handle"
,
exh
);
return
-
1
;
}
int
transSendResponse
(
const
STransMsg
*
msg
)
{
...
...
@@ -1266,7 +1266,7 @@ int transSendResponse(const STransMsg* msg) {
m
->
type
=
Normal
;
STraceId
*
trace
=
(
STraceId
*
)
&
msg
->
info
.
traceId
;
tG
Trace
(
"conn %p start to send resp (1/2)"
,
exh
->
handle
);
tG
Debug
(
"conn %p start to send resp (1/2)"
,
exh
->
handle
);
if
(
0
!=
transAsyncSend
(
pThrd
->
asyncPool
,
&
m
->
q
))
{
destroySmsg
(
m
);
}
...
...
@@ -1275,12 +1275,12 @@ int transSendResponse(const STransMsg* msg) {
return
0
;
_return1:
t
Trace
(
"handle %p failed to send resp"
,
exh
);
t
Debug
(
"handle %p failed to send resp"
,
exh
);
rpcFreeCont
(
msg
->
pCont
);
transReleaseExHandle
(
transGetRefMgt
(),
refId
);
return
-
1
;
_return2:
t
Trace
(
"handle %p failed to send resp"
,
exh
);
t
Debug
(
"handle %p failed to send resp"
,
exh
);
rpcFreeCont
(
msg
->
pCont
);
return
-
1
;
}
...
...
@@ -1302,7 +1302,7 @@ int transRegisterMsg(const STransMsg* msg) {
m
->
type
=
Register
;
STrans
*
pTransInst
=
pThrd
->
pTransInst
;
t
Trace
(
"%s conn %p start to register brokenlink callback"
,
transLabel
(
pTransInst
),
exh
->
handle
);
t
Debug
(
"%s conn %p start to register brokenlink callback"
,
transLabel
(
pTransInst
),
exh
->
handle
);
if
(
0
!=
transAsyncSend
(
pThrd
->
asyncPool
,
&
m
->
q
))
{
destroySmsg
(
m
);
}
...
...
@@ -1311,12 +1311,12 @@ int transRegisterMsg(const STransMsg* msg) {
return
0
;
_return1:
t
Trace
(
"handle %p failed to register brokenlink"
,
exh
);
t
Debug
(
"handle %p failed to register brokenlink"
,
exh
);
rpcFreeCont
(
msg
->
pCont
);
transReleaseExHandle
(
transGetRefMgt
(),
refId
);
return
-
1
;
_return2:
t
Trace
(
"handle %p failed to register brokenlink"
,
exh
);
t
Debug
(
"handle %p failed to register brokenlink"
,
exh
);
rpcFreeCont
(
msg
->
pCont
);
return
-
1
;
}
...
...
source/util/src/terror.c
浏览文件 @
189cdc4f
...
...
@@ -95,6 +95,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MSG_DECODE_ERROR, "Msg decode error")
TAOS_DEFINE_ERROR
(
TSDB_CODE_NO_AVAIL_DISK
,
"No available disk"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_NOT_FOUND
,
"Not found"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_NO_DISKSPACE
,
"Out of disk space"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_TIMEOUT_ERROR
,
"Operation timeout"
)
//client
TAOS_DEFINE_ERROR
(
TSDB_CODE_TSC_INVALID_OPERATION
,
"Invalid operation"
)
...
...
@@ -407,6 +408,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_RECONFIG_NOT_READY, "Sync not ready for re
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_PROPOSE_NOT_READY
,
"Sync not ready for propose"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_STANDBY_NOT_READY
,
"Sync not ready for standby"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_BATCH_ERROR
,
"Sync batch error"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_RESTORING
,
"Sync is restoring"
)
TAOS_DEFINE_ERROR
(
TSDB_CODE_SYN_INTERNAL_ERROR
,
"Sync internal error"
)
//tq
...
...
tests/parallel_test/cases.task
浏览文件 @
189cdc4f
...
...
@@ -113,7 +113,7 @@
,,y,script,./test.sh -f tsim/parser/first_last.sim
,,y,script,./test.sh -f tsim/parser/fill_stb.sim
,,y,script,./test.sh -f tsim/parser/interp.sim
,,y,script,./test.sh -f tsim/parser/limit2.sim
#
,,y,script,./test.sh -f tsim/parser/limit2.sim
,,y,script,./test.sh -f tsim/parser/fourArithmetic-basic.sim
,,y,script,./test.sh -f tsim/parser/function.sim
,,y,script,./test.sh -f tsim/parser/groupby-basic.sim
...
...
tests/system-test/0-others/sysinfo.py
浏览文件 @
189cdc4f
...
...
@@ -48,7 +48,7 @@ class TDTestCase:
tdSql
.
checkData
(
0
,
0
,
1
)
#!for bug
tdDnodes
.
stoptaosd
(
1
)
sleep
(
self
.
delaytime
)
sleep
(
self
.
delaytime
*
5
)
if
platform
.
system
().
lower
()
==
'windows'
:
sleep
(
10
)
tdSql
.
error
(
'select server_status()'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录