Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
a6d540f2
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a6d540f2
编写于
12月 06, 2022
作者:
D
dapan1121
提交者:
GitHub
12月 06, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #18725 from taosdata/enh/failFast
enh: add fail-fast
上级
5b829412
0254e90c
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
81 addition
and
3 deletion
+81
-3
include/libs/transport/trpc.h
include/libs/transport/trpc.h
+6
-0
source/dnode/mgmt/node_mgmt/src/dmTransport.c
source/dnode/mgmt/node_mgmt/src/dmTransport.c
+9
-0
source/libs/transport/inc/transportInt.h
source/libs/transport/inc/transportInt.h
+4
-0
source/libs/transport/src/trans.c
source/libs/transport/src/trans.c
+4
-0
source/libs/transport/src/transCli.c
source/libs/transport/src/transCli.c
+58
-3
未找到文件。
include/libs/transport/trpc.h
浏览文件 @
a6d540f2
...
@@ -72,6 +72,7 @@ typedef struct SRpcMsg {
...
@@ -72,6 +72,7 @@ typedef struct SRpcMsg {
typedef
void
(
*
RpcCfp
)(
void
*
parent
,
SRpcMsg
*
,
SEpSet
*
epset
);
typedef
void
(
*
RpcCfp
)(
void
*
parent
,
SRpcMsg
*
,
SEpSet
*
epset
);
typedef
bool
(
*
RpcRfp
)(
int32_t
code
,
tmsg_t
msgType
);
typedef
bool
(
*
RpcRfp
)(
int32_t
code
,
tmsg_t
msgType
);
typedef
bool
(
*
RpcTfp
)(
int32_t
code
,
tmsg_t
msgType
);
typedef
bool
(
*
RpcTfp
)(
int32_t
code
,
tmsg_t
msgType
);
typedef
bool
(
*
RpcFFfp
)(
tmsg_t
msgType
);
typedef
void
(
*
RpcDfp
)(
void
*
ahandle
);
typedef
void
(
*
RpcDfp
)(
void
*
ahandle
);
typedef
struct
SRpcInit
{
typedef
struct
SRpcInit
{
...
@@ -90,6 +91,9 @@ typedef struct SRpcInit {
...
@@ -90,6 +91,9 @@ typedef struct SRpcInit {
int32_t
retryMaxInterval
;
// retry max interval
int32_t
retryMaxInterval
;
// retry max interval
int64_t
retryMaxTimouet
;
int64_t
retryMaxTimouet
;
int32_t
failFastThreshold
;
int32_t
failFastInterval
;
int32_t
compressSize
;
// -1: no compress, 0 : all data compressed, size: compress data if larger than size
int32_t
compressSize
;
// -1: no compress, 0 : all data compressed, size: compress data if larger than size
int8_t
encryption
;
// encrypt or not
int8_t
encryption
;
// encrypt or not
...
@@ -107,6 +111,8 @@ typedef struct SRpcInit {
...
@@ -107,6 +111,8 @@ typedef struct SRpcInit {
// destroy client ahandle;
// destroy client ahandle;
RpcDfp
dfp
;
RpcDfp
dfp
;
// fail fast fp
RpcFFfp
ffp
;
void
*
parent
;
void
*
parent
;
}
SRpcInit
;
}
SRpcInit
;
...
...
source/dnode/mgmt/node_mgmt/src/dmTransport.c
浏览文件 @
a6d540f2
...
@@ -48,6 +48,11 @@ int32_t dmProcessNodeMsg(SMgmtWrapper *pWrapper, SRpcMsg *pMsg) {
...
@@ -48,6 +48,11 @@ int32_t dmProcessNodeMsg(SMgmtWrapper *pWrapper, SRpcMsg *pMsg) {
return
(
*
msgFp
)(
pWrapper
->
pMgmt
,
pMsg
);
return
(
*
msgFp
)(
pWrapper
->
pMgmt
,
pMsg
);
}
}
static
bool
dmFailFastFp
(
tmsg_t
msgType
)
{
// add more msg type later
return
msgType
==
TDMT_SYNC_HEARTBEAT
;
}
static
void
dmProcessRpcMsg
(
SDnode
*
pDnode
,
SRpcMsg
*
pRpc
,
SEpSet
*
pEpSet
)
{
static
void
dmProcessRpcMsg
(
SDnode
*
pDnode
,
SRpcMsg
*
pRpc
,
SEpSet
*
pEpSet
)
{
SDnodeTrans
*
pTrans
=
&
pDnode
->
trans
;
SDnodeTrans
*
pTrans
=
&
pDnode
->
trans
;
int32_t
code
=
-
1
;
int32_t
code
=
-
1
;
...
@@ -260,6 +265,10 @@ int32_t dmInitClient(SDnode *pDnode) {
...
@@ -260,6 +265,10 @@ int32_t dmInitClient(SDnode *pDnode) {
rpcInit
.
retryMaxInterval
=
tsRedirectMaxPeriod
;
rpcInit
.
retryMaxInterval
=
tsRedirectMaxPeriod
;
rpcInit
.
retryMaxTimouet
=
tsMaxRetryWaitTime
;
rpcInit
.
retryMaxTimouet
=
tsMaxRetryWaitTime
;
rpcInit
.
failFastInterval
=
1000
;
// interval threshold(ms)
rpcInit
.
failFastThreshold
=
3
;
// failed threshold
rpcInit
.
ffp
=
dmFailFastFp
;
pTrans
->
clientRpc
=
rpcOpen
(
&
rpcInit
);
pTrans
->
clientRpc
=
rpcOpen
(
&
rpcInit
);
if
(
pTrans
->
clientRpc
==
NULL
)
{
if
(
pTrans
->
clientRpc
==
NULL
)
{
dError
(
"failed to init dnode rpc client"
);
dError
(
"failed to init dnode rpc client"
);
...
...
source/libs/transport/inc/transportInt.h
浏览文件 @
a6d540f2
...
@@ -57,10 +57,14 @@ typedef struct {
...
@@ -57,10 +57,14 @@ typedef struct {
int32_t
retryMaxInterval
;
// retry max interval
int32_t
retryMaxInterval
;
// retry max interval
int32_t
retryMaxTimouet
;
int32_t
retryMaxTimouet
;
int32_t
failFastThreshold
;
int32_t
failFastInterval
;
void
(
*
cfp
)(
void
*
parent
,
SRpcMsg
*
,
SEpSet
*
);
void
(
*
cfp
)(
void
*
parent
,
SRpcMsg
*
,
SEpSet
*
);
bool
(
*
retry
)(
int32_t
code
,
tmsg_t
msgType
);
bool
(
*
retry
)(
int32_t
code
,
tmsg_t
msgType
);
bool
(
*
startTimer
)(
int32_t
code
,
tmsg_t
msgType
);
bool
(
*
startTimer
)(
int32_t
code
,
tmsg_t
msgType
);
void
(
*
destroyFp
)(
void
*
ahandle
);
void
(
*
destroyFp
)(
void
*
ahandle
);
bool
(
*
failFastFp
)(
tmsg_t
msgType
);
int
index
;
int
index
;
void
*
parent
;
void
*
parent
;
...
...
source/libs/transport/src/trans.c
浏览文件 @
a6d540f2
...
@@ -56,11 +56,15 @@ void* rpcOpen(const SRpcInit* pInit) {
...
@@ -56,11 +56,15 @@ void* rpcOpen(const SRpcInit* pInit) {
pRpc
->
retryMaxInterval
=
pInit
->
retryMaxInterval
;
pRpc
->
retryMaxInterval
=
pInit
->
retryMaxInterval
;
pRpc
->
retryMaxTimouet
=
pInit
->
retryMaxTimouet
;
pRpc
->
retryMaxTimouet
=
pInit
->
retryMaxTimouet
;
pRpc
->
failFastThreshold
=
pInit
->
failFastThreshold
;
pRpc
->
failFastInterval
=
pInit
->
failFastInterval
;
// register callback handle
// register callback handle
pRpc
->
cfp
=
pInit
->
cfp
;
pRpc
->
cfp
=
pInit
->
cfp
;
pRpc
->
retry
=
pInit
->
rfp
;
pRpc
->
retry
=
pInit
->
rfp
;
pRpc
->
startTimer
=
pInit
->
tfp
;
pRpc
->
startTimer
=
pInit
->
tfp
;
pRpc
->
destroyFp
=
pInit
->
dfp
;
pRpc
->
destroyFp
=
pInit
->
dfp
;
pRpc
->
failFastFp
=
pInit
->
ffp
;
pRpc
->
numOfThreads
=
pInit
->
numOfThreads
>
TSDB_MAX_RPC_THREADS
?
TSDB_MAX_RPC_THREADS
:
pInit
->
numOfThreads
;
pRpc
->
numOfThreads
=
pInit
->
numOfThreads
>
TSDB_MAX_RPC_THREADS
?
TSDB_MAX_RPC_THREADS
:
pInit
->
numOfThreads
;
if
(
pRpc
->
numOfThreads
<=
0
)
{
if
(
pRpc
->
numOfThreads
<=
0
)
{
...
...
source/libs/transport/src/transCli.c
浏览文件 @
a6d540f2
...
@@ -84,6 +84,8 @@ typedef struct SCliThrd {
...
@@ -84,6 +84,8 @@ typedef struct SCliThrd {
SHashObj
*
fqdn2ipCache
;
SHashObj
*
fqdn2ipCache
;
SCvtAddr
cvtAddr
;
SCvtAddr
cvtAddr
;
SHashObj
*
failFastCache
;
SCliMsg
*
stopMsg
;
SCliMsg
*
stopMsg
;
bool
quit
;
bool
quit
;
...
@@ -96,6 +98,13 @@ typedef struct SCliObj {
...
@@ -96,6 +98,13 @@ typedef struct SCliObj {
SCliThrd
**
pThreadObj
;
SCliThrd
**
pThreadObj
;
}
SCliObj
;
}
SCliObj
;
typedef
struct
{
int32_t
reinit
;
int64_t
timestamp
;
int32_t
count
;
int32_t
threshold
;
int64_t
interval
;
}
SFailFastItem
;
// conn pool
// conn pool
// add expire timeout and capacity limit
// add expire timeout and capacity limit
static
void
*
createConnPool
(
int
size
);
static
void
*
createConnPool
(
int
size
);
...
@@ -853,7 +862,7 @@ void cliSend(SCliConn* pConn) {
...
@@ -853,7 +862,7 @@ void cliSend(SCliConn* pConn) {
int
status
=
uv_write
(
req
,
(
uv_stream_t
*
)
pConn
->
stream
,
&
wb
,
1
,
cliSendCb
);
int
status
=
uv_write
(
req
,
(
uv_stream_t
*
)
pConn
->
stream
,
&
wb
,
1
,
cliSendCb
);
if
(
status
!=
0
)
{
if
(
status
!=
0
)
{
tGError
(
"%s conn %p failed to sen
t
msg:%s, errmsg:%s"
,
CONN_GET_INST_LABEL
(
pConn
),
pConn
,
TMSG_INFO
(
pMsg
->
msgType
),
tGError
(
"%s conn %p failed to sen
d
msg:%s, errmsg:%s"
,
CONN_GET_INST_LABEL
(
pConn
),
pConn
,
TMSG_INFO
(
pMsg
->
msgType
),
uv_err_name
(
status
));
uv_err_name
(
status
));
cliHandleExcept
(
pConn
);
cliHandleExcept
(
pConn
);
}
}
...
@@ -863,7 +872,6 @@ _RETURN:
...
@@ -863,7 +872,6 @@ _RETURN:
}
}
void
cliConnCb
(
uv_connect_t
*
req
,
int
status
)
{
void
cliConnCb
(
uv_connect_t
*
req
,
int
status
)
{
// impl later
SCliConn
*
pConn
=
req
->
data
;
SCliConn
*
pConn
=
req
->
data
;
SCliThrd
*
pThrd
=
pConn
->
hostThrd
;
SCliThrd
*
pThrd
=
pConn
->
hostThrd
;
...
@@ -875,7 +883,32 @@ void cliConnCb(uv_connect_t* req, int status) {
...
@@ -875,7 +883,32 @@ void cliConnCb(uv_connect_t* req, int status) {
}
}
if
(
status
!=
0
)
{
if
(
status
!=
0
)
{
tError
(
"%s conn %p failed to connect server:%s"
,
CONN_GET_INST_LABEL
(
pConn
),
pConn
,
uv_strerror
(
status
));
tError
(
"%s conn %p failed to connect to %s:%d, reason:%s"
,
CONN_GET_INST_LABEL
(
pConn
),
pConn
,
pConn
->
ip
,
pConn
->
port
,
uv_strerror
(
status
));
SCliMsg
*
pMsg
=
transQueueGet
(
&
pConn
->
cliMsgs
,
0
);
STrans
*
pTransInst
=
pThrd
->
pTransInst
;
if
(
pMsg
!=
NULL
&&
REQUEST_NO_RESP
(
&
pMsg
->
msg
)
&&
(
pTransInst
->
failFastFp
!=
NULL
&&
pTransInst
->
failFastFp
(
pMsg
->
msg
.
msgType
)))
{
char
*
ip
=
pConn
->
ip
;
uint32_t
port
=
pConn
->
port
;
char
key
[
TSDB_FQDN_LEN
+
64
]
=
{
0
};
CONN_CONSTRUCT_HASH_KEY
(
key
,
ip
,
port
);
SFailFastItem
*
item
=
taosHashGet
(
pThrd
->
failFastCache
,
key
,
strlen
(
key
));
int64_t
cTimestamp
=
taosGetTimestampMs
();
if
(
item
!=
NULL
)
{
int32_t
elapse
=
cTimestamp
-
item
->
timestamp
;
if
(
elapse
>=
0
&&
elapse
<=
pTransInst
->
failFastInterval
)
{
item
->
count
++
;
}
else
{
item
->
count
=
1
;
item
->
timestamp
=
cTimestamp
;
}
}
else
{
SFailFastItem
item
=
{.
count
=
1
,
.
timestamp
=
cTimestamp
};
taosHashPut
(
pThrd
->
failFastCache
,
key
,
strlen
(
key
),
&
item
,
sizeof
(
SFailFastItem
));
}
}
cliHandleExcept
(
pConn
);
cliHandleExcept
(
pConn
);
return
;
return
;
}
}
...
@@ -1027,6 +1060,25 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) {
...
@@ -1027,6 +1060,25 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) {
return
;
return
;
}
}
if
(
REQUEST_NO_RESP
(
&
pMsg
->
msg
)
&&
(
pTransInst
->
failFastFp
!=
NULL
&&
pTransInst
->
failFastFp
(
pMsg
->
msg
.
msgType
)))
{
char
*
ip
=
EPSET_GET_INUSE_IP
(
&
pCtx
->
epSet
);
uint32_t
port
=
EPSET_GET_INUSE_PORT
(
&
pCtx
->
epSet
);
char
key
[
TSDB_FQDN_LEN
+
64
]
=
{
0
};
CONN_CONSTRUCT_HASH_KEY
(
key
,
ip
,
port
);
SFailFastItem
*
item
=
taosHashGet
(
pThrd
->
failFastCache
,
key
,
strlen
(
key
));
if
(
item
!=
NULL
)
{
int32_t
elapse
=
(
int32_t
)(
taosGetTimestampMs
()
-
item
->
timestamp
);
if
(
item
->
count
>=
pTransInst
->
failFastThreshold
&&
(
elapse
>=
0
&&
elapse
<=
pTransInst
->
failFastInterval
))
{
STraceId
*
trace
=
&
(
pMsg
->
msg
.
info
.
traceId
);
tGTrace
(
"%s, msg %p cancel to send, reason: failed to connect %s:%d: count: %d, at %d"
,
pTransInst
->
label
,
pMsg
,
ip
,
port
,
item
->
count
,
elapse
);
destroyCmsg
(
pMsg
);
return
;
}
}
}
bool
ignore
=
false
;
bool
ignore
=
false
;
SCliConn
*
conn
=
cliGetConn
(
pMsg
,
pThrd
,
&
ignore
);
SCliConn
*
conn
=
cliGetConn
(
pMsg
,
pThrd
,
&
ignore
);
if
(
ignore
==
true
)
{
if
(
ignore
==
true
)
{
...
@@ -1299,6 +1351,8 @@ static SCliThrd* createThrdObj(void* trans) {
...
@@ -1299,6 +1351,8 @@ static SCliThrd* createThrdObj(void* trans) {
pThrd
->
destroyAhandleFp
=
pTransInst
->
destroyFp
;
pThrd
->
destroyAhandleFp
=
pTransInst
->
destroyFp
;
pThrd
->
fqdn2ipCache
=
taosHashInit
(
4
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_BINARY
),
true
,
HASH_NO_LOCK
);
pThrd
->
fqdn2ipCache
=
taosHashInit
(
4
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_BINARY
),
true
,
HASH_NO_LOCK
);
pThrd
->
failFastCache
=
taosHashInit
(
8
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_BINARY
),
true
,
HASH_NO_LOCK
);
pThrd
->
quit
=
false
;
pThrd
->
quit
=
false
;
return
pThrd
;
return
pThrd
;
}
}
...
@@ -1325,6 +1379,7 @@ static void destroyThrdObj(SCliThrd* pThrd) {
...
@@ -1325,6 +1379,7 @@ static void destroyThrdObj(SCliThrd* pThrd) {
taosMemoryFree
(
pThrd
->
prepare
);
taosMemoryFree
(
pThrd
->
prepare
);
taosMemoryFree
(
pThrd
->
loop
);
taosMemoryFree
(
pThrd
->
loop
);
taosHashCleanup
(
pThrd
->
fqdn2ipCache
);
taosHashCleanup
(
pThrd
->
fqdn2ipCache
);
taosHashCleanup
(
pThrd
->
failFastCache
);
taosMemoryFree
(
pThrd
);
taosMemoryFree
(
pThrd
);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录