Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
3077a244
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3077a244
编写于
1月 08, 2022
作者:
S
Shengliang Guan
提交者:
GitHub
1月 08, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #9690 from taosdata/feature/qnode
feature/qnode
上级
a2260fd0
e5834b8e
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
110 addition
and
40 deletion
+110
-40
source/libs/scheduler/src/scheduler.c
source/libs/scheduler/src/scheduler.c
+110
-40
未找到文件。
source/libs/scheduler/src/scheduler.c
浏览文件 @
3077a244
...
...
@@ -20,6 +20,73 @@
static
SSchedulerMgmt
schMgmt
=
{
0
};
int32_t
schValidateStatus
(
SSchJob
*
pJob
,
int8_t
oriStatus
,
int8_t
newStatus
)
{
int32_t
code
=
0
;
if
(
oriStatus
==
newStatus
)
{
SCH_ERR_JRET
(
TSDB_CODE_QRY_APP_ERROR
);
}
switch
(
oriStatus
)
{
case
JOB_TASK_STATUS_NULL
:
if
(
newStatus
!=
JOB_TASK_STATUS_EXECUTING
&&
newStatus
!=
JOB_TASK_STATUS_FAILED
&&
newStatus
!=
JOB_TASK_STATUS_NOT_START
)
{
SCH_ERR_JRET
(
TSDB_CODE_QRY_APP_ERROR
);
}
break
;
case
JOB_TASK_STATUS_NOT_START
:
if
(
newStatus
!=
JOB_TASK_STATUS_CANCELLED
)
{
SCH_ERR_JRET
(
TSDB_CODE_QRY_APP_ERROR
);
}
break
;
case
JOB_TASK_STATUS_EXECUTING
:
if
(
newStatus
!=
JOB_TASK_STATUS_PARTIAL_SUCCEED
&&
newStatus
!=
JOB_TASK_STATUS_FAILED
&&
newStatus
!=
JOB_TASK_STATUS_CANCELLING
&&
newStatus
!=
JOB_TASK_STATUS_CANCELLED
&&
newStatus
!=
JOB_TASK_STATUS_DROPPING
)
{
SCH_ERR_JRET
(
TSDB_CODE_QRY_APP_ERROR
);
}
break
;
case
JOB_TASK_STATUS_PARTIAL_SUCCEED
:
if
(
newStatus
!=
JOB_TASK_STATUS_EXECUTING
&&
newStatus
!=
JOB_TASK_STATUS_SUCCEED
&&
newStatus
!=
JOB_TASK_STATUS_CANCELLED
)
{
SCH_ERR_JRET
(
TSDB_CODE_QRY_APP_ERROR
);
}
break
;
case
JOB_TASK_STATUS_SUCCEED
:
case
JOB_TASK_STATUS_FAILED
:
case
JOB_TASK_STATUS_CANCELLING
:
if
(
newStatus
!=
JOB_TASK_STATUS_CANCELLED
)
{
SCH_ERR_JRET
(
TSDB_CODE_QRY_APP_ERROR
);
}
break
;
case
JOB_TASK_STATUS_CANCELLED
:
case
JOB_TASK_STATUS_DROPPING
:
SCH_ERR_JRET
(
TSDB_CODE_QRY_APP_ERROR
);
break
;
default:
qError
(
"invalid task status:%d"
,
oriStatus
);
return
TSDB_CODE_QRY_APP_ERROR
;
}
return
TSDB_CODE_SUCCESS
;
_return:
SCH_JOB_ELOG
(
"invalid job status update, from %d to %d"
,
oriStatus
,
newStatus
);
SCH_ERR_RET
(
code
);
}
int32_t
schBuildTaskRalation
(
SSchJob
*
pJob
,
SHashObj
*
planToTask
)
{
for
(
int32_t
i
=
0
;
i
<
pJob
->
levelNum
;
++
i
)
{
SSchLevel
*
pLevel
=
taosArrayGet
(
pJob
->
levels
,
i
);
...
...
@@ -365,14 +432,21 @@ int32_t schProcessOnJobPartialSuccess(SSchJob *job) {
return
TSDB_CODE_SUCCESS
;
}
int32_t
schProcessOnJobFailure
(
SSchJob
*
job
,
int32_t
errCode
)
{
job
->
status
=
JOB_TASK_STATUS_FAILED
;
job
->
errCode
=
errCode
;
int32_t
schProcessOnJobFailure
(
SSchJob
*
pJob
,
int32_t
errCode
)
{
int8_t
status
=
SCH_GET_JOB_STATUS
(
pJob
);
atomic_val_compare_exchange_32
(
&
job
->
remoteFetch
,
1
,
0
);
if
(
schValidateStatus
(
pJob
,
status
,
JOB_TASK_STATUS_FAILED
))
{
SCH_ERR_RET
(
atomic_load_32
(
&
pJob
->
errCode
));
}
SCH_SET_JOB_STATUS
(
pJob
,
JOB_TASK_STATUS_FAILED
);
atomic_store_32
(
&
pJob
->
errCode
,
errCode
);
if
(
job
->
userFetch
||
((
!
SCH_JOB_NEED_FETCH
(
&
job
->
attr
))
&&
job
->
attr
.
syncSchedule
))
{
tsem_post
(
&
job
->
rspSem
);
atomic_val_compare_exchange_32
(
&
pJob
->
remoteFetch
,
1
,
0
);
if
(
pJob
->
userFetch
||
((
!
SCH_JOB_NEED_FETCH
(
&
pJob
->
attr
))
&&
pJob
->
attr
.
syncSchedule
))
{
tsem_post
(
&
pJob
->
rspSem
);
}
return
TSDB_CODE_SUCCESS
;
...
...
@@ -670,13 +744,13 @@ int32_t schAsyncSendMsg(void *transport, SEpSet* epSet, uint64_t qId, uint64_t t
int32_t
code
=
0
;
SMsgSendInfo
*
pMsgSendInfo
=
calloc
(
1
,
sizeof
(
SMsgSendInfo
));
if
(
NULL
==
pMsgSendInfo
)
{
qError
(
"
calloc %d failed"
,
(
int32_t
)
sizeof
(
SMsgSendInfo
));
qError
(
"
QID:%"
PRIx64
",TID:%"
PRIx64
" calloc %d failed"
,
qId
,
tId
,
(
int32_t
)
sizeof
(
SMsgSendInfo
));
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
SSchCallbackParam
*
param
=
calloc
(
1
,
sizeof
(
SSchCallbackParam
));
if
(
NULL
==
param
)
{
qError
(
"
calloc %d failed"
,
(
int32_t
)
sizeof
(
SSchCallbackParam
));
qError
(
"
QID:%"
PRIx64
",TID:%"
PRIx64
" calloc %d failed"
,
qId
,
tId
,
(
int32_t
)
sizeof
(
SSchCallbackParam
));
SCH_ERR_JRET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
...
...
@@ -694,11 +768,13 @@ int32_t schAsyncSendMsg(void *transport, SEpSet* epSet, uint64_t qId, uint64_t t
pMsgSendInfo
->
fp
=
fp
;
int64_t
transporterId
=
0
;
SCH_ERR_JRET
(
asyncSendMsgToServer
(
transport
,
epSet
,
&
transporterId
,
pMsgSendInfo
));
return
TSDB_CODE_SUCCESS
;
_return:
tfree
(
param
);
tfree
(
pMsgSendInfo
);
...
...
@@ -720,35 +796,31 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType) {
uint32_t
msgSize
=
0
;
void
*
msg
=
NULL
;
int32_t
code
=
0
;
SEpSet
epSet
;
SQueryNodeAddr
*
addr
=
taosArrayGet
(
pTask
->
candidateAddrs
,
pTask
->
candidateIdx
);
schConvertAddrToEpSet
(
addr
,
&
epSet
);
switch
(
msgType
)
{
case
TDMT_VND_CREATE_TABLE
:
case
TDMT_VND_SUBMIT
:
{
if
(
NULL
==
pTask
->
msg
||
pTask
->
msgLen
<=
0
)
{
qError
(
"submit msg is NULL"
);
SCH_ERR_RET
(
TSDB_CODE_SCH_INTERNAL_ERROR
);
}
msgSize
=
pTask
->
msgLen
;
msg
=
pTask
->
msg
;
break
;
}
case
TDMT_VND_QUERY
:
{
if
(
NULL
==
pTask
->
msg
)
{
qError
(
"query msg is NULL"
);
SCH_ERR_RET
(
TSDB_CODE_SCH_INTERNAL_ERROR
);
}
msgSize
=
sizeof
(
SSubQueryMsg
)
+
pTask
->
msgLen
;
msg
=
calloc
(
1
,
msgSize
);
if
(
NULL
==
msg
)
{
qError
(
"calloc %d failed"
,
msgSize
);
SCH_TASK_ELOG
(
"calloc %d failed"
,
msgSize
);
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
SSubQueryMsg
*
pMsg
=
msg
;
pMsg
->
header
.
vgId
=
htonl
(
pTask
->
plan
->
execNode
.
nodeId
);
pMsg
->
header
.
vgId
=
htonl
(
addr
->
nodeId
);
pMsg
->
sId
=
htobe64
(
schMgmt
.
sId
);
pMsg
->
queryId
=
htobe64
(
pJob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
pTask
->
taskId
);
...
...
@@ -760,32 +832,31 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType) {
msgSize
=
sizeof
(
SResReadyMsg
);
msg
=
calloc
(
1
,
msgSize
);
if
(
NULL
==
msg
)
{
qError
(
"calloc %d failed"
,
msgSize
);
SCH_TASK_ELOG
(
"calloc %d failed"
,
msgSize
);
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
SResReadyMsg
*
pMsg
=
msg
;
pMsg
->
header
.
vgId
=
htonl
(
pTask
->
plan
->
execNode
.
nodeId
);
pMsg
->
header
.
vgId
=
htonl
(
addr
->
nodeId
);
pMsg
->
sId
=
htobe64
(
schMgmt
.
sId
);
pMsg
->
queryId
=
htobe64
(
pJob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
pTask
->
taskId
);
break
;
}
case
TDMT_VND_FETCH
:
{
if
(
NULL
==
pTask
)
{
SCH_ERR_RET
(
TSDB_CODE_QRY_APP_ERROR
);
}
msgSize
=
sizeof
(
SResFetchMsg
);
msg
=
calloc
(
1
,
msgSize
);
if
(
NULL
==
msg
)
{
qError
(
"calloc %d failed"
,
msgSize
);
SCH_TASK_ELOG
(
"calloc %d failed"
,
msgSize
);
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
SResFetchMsg
*
pMsg
=
msg
;
pMsg
->
header
.
vgId
=
htonl
(
pTask
->
plan
->
execNode
.
nodeId
);
pMsg
->
header
.
vgId
=
htonl
(
addr
->
nodeId
);
pMsg
->
sId
=
htobe64
(
schMgmt
.
sId
);
pMsg
->
queryId
=
htobe64
(
pJob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
pTask
->
taskId
);
...
...
@@ -795,28 +866,25 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType) {
msgSize
=
sizeof
(
STaskDropMsg
);
msg
=
calloc
(
1
,
msgSize
);
if
(
NULL
==
msg
)
{
qError
(
"calloc %d failed"
,
msgSize
);
SCH_TASK_ELOG
(
"calloc %d failed"
,
msgSize
);
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
STaskDropMsg
*
pMsg
=
msg
;
pMsg
->
header
.
vgId
=
htonl
(
pTask
->
plan
->
execNode
.
nodeId
);
pMsg
->
header
.
vgId
=
htonl
(
addr
->
nodeId
);
pMsg
->
sId
=
htobe64
(
schMgmt
.
sId
);
pMsg
->
queryId
=
htobe64
(
pJob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
pTask
->
taskId
);
break
;
}
default:
qError
(
"unknown msg type:%d"
,
msgType
);
SCH_TASK_ELOG
(
"unknown msg type:%d"
,
msgType
);
SCH_ERR_RET
(
TSDB_CODE_SCH_INTERNAL_ERROR
);
break
;
}
SEpSet
epSet
;
SQueryNodeAddr
*
addr
=
taosArrayGet
(
pTask
->
candidateAddrs
,
pTask
->
candidateIdx
);
schConvertAddrToEpSet
(
addr
,
&
epSet
);
SCH_ERR_JRET
(
schAsyncSendMsg
(
pJob
->
transport
,
&
epSet
,
pJob
->
queryId
,
pTask
->
taskId
,
msgType
,
msg
,
msgSize
));
...
...
@@ -844,7 +912,7 @@ int32_t schLaunchTask(SSchJob *pJob, SSchTask *pTask) {
if
(
schJobNeedToStop
(
pJob
,
&
status
))
{
SCH_TASK_ELOG
(
"no need to launch task cause of job status, job status:%d"
,
status
);
SCH_ERR_RET
(
pJob
->
errCode
);
SCH_ERR_RET
(
atomic_load_32
(
&
pJob
->
errCode
)
);
}
SSubplan
*
plan
=
pTask
->
plan
;
...
...
@@ -860,9 +928,11 @@ int32_t schLaunchTask(SSchJob *pJob, SSchTask *pTask) {
SCH_ERR_JRET
(
schSetTaskCandidateAddrs
(
pJob
,
pTask
));
// NOTE: race condition: the task should be put into the hash table before send msg to server
SCH_ERR_JRET
(
schPushTaskToExecList
(
pJob
,
pTask
));
if
(
SCH_GET_TASK_STATUS
(
pTask
)
!=
JOB_TASK_STATUS_EXECUTING
)
{
SCH_ERR_JRET
(
schPushTaskToExecList
(
pJob
,
pTask
));
SCH_SET_TASK_STATUS
(
pTask
,
JOB_TASK_STATUS_EXECUTING
);
SCH_SET_TASK_STATUS
(
pTask
,
JOB_TASK_STATUS_EXECUTING
);
}
SCH_ERR_JRET
(
schBuildAndSendMsg
(
pJob
,
pTask
,
plan
->
msgType
));
...
...
@@ -1031,7 +1101,7 @@ int32_t scheduleExecJob(void *transport, SArray *nodeList, SQueryDag* pDag, void
SSchJob
*
job
=
*
(
SSchJob
**
)
pJob
;
pRes
->
code
=
job
->
errCode
;
pRes
->
code
=
atomic_load_32
(
&
job
->
errCode
)
;
pRes
->
numOfRows
=
job
->
resNumOfRows
;
return
TSDB_CODE_SUCCESS
;
...
...
@@ -1061,7 +1131,7 @@ int32_t scheduleFetchRows(void *pJob, void **data) {
if
(
job
->
status
==
JOB_TASK_STATUS_FAILED
)
{
job
->
res
=
NULL
;
SCH_RET
(
job
->
errCode
);
SCH_RET
(
atomic_load_32
(
&
job
->
errCode
)
);
}
if
(
job
->
status
==
JOB_TASK_STATUS_SUCCEED
)
{
...
...
@@ -1081,7 +1151,7 @@ int32_t scheduleFetchRows(void *pJob, void **data) {
tsem_wait
(
&
job
->
rspSem
);
if
(
job
->
status
==
JOB_TASK_STATUS_FAILED
)
{
code
=
job
->
errCode
;
code
=
atomic_load_32
(
&
job
->
errCode
)
;
}
if
(
job
->
res
&&
((
SRetrieveTableRsp
*
)
job
->
res
)
->
completed
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录