Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
30556c67
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1187
Star
22018
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
30556c67
编写于
1月 08, 2022
作者:
D
dapan1121
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feature/qnode
上级
896df2bd
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
89 addition
and
48 deletion
+89
-48
source/libs/scheduler/inc/schedulerInt.h
source/libs/scheduler/inc/schedulerInt.h
+6
-0
source/libs/scheduler/src/scheduler.c
source/libs/scheduler/src/scheduler.c
+83
-48
未找到文件。
source/libs/scheduler/inc/schedulerInt.h
浏览文件 @
30556c67
...
...
@@ -113,6 +113,12 @@ typedef struct SSchJob {
#define SCH_IS_DATA_SRC_TASK(task) ((task)->plan->type == QUERY_TYPE_SCAN)
#define SCH_TASK_NEED_WAIT_ALL(task) ((task)->plan->type == QUERY_TYPE_MODIFY)
#define SCH_SET_TASK_STATUS(task, st) atomic_store_8(&(task)->status, st)
#define SCH_GET_TASK_STATUS(task) atomic_load_8(&(task)->status)
#define SCH_SET_JOB_STATUS(job, st) atomic_store_8(&(job)->status, st)
#define SCH_GET_JOB_STATUS(job) atomic_load_8(&(job)->status)
#define SCH_SET_JOB_TYPE(pAttr, type) (pAttr)->queryJob = ((type) != QUERY_TYPE_MODIFY)
#define SCH_JOB_NEED_FETCH(pAttr) ((pAttr)->queryJob)
...
...
source/libs/scheduler/src/scheduler.c
浏览文件 @
30556c67
...
...
@@ -100,7 +100,7 @@ int32_t schBuildTaskRalation(SSchJob *pJob, SHashObj *planToTask) {
int32_t
schInitTask
(
SSchJob
*
pJob
,
SSchTask
*
pTask
,
SSubplan
*
pPlan
,
SSchLevel
*
pLevel
)
{
pTask
->
plan
=
pPlan
;
pTask
->
level
=
pLevel
;
pTask
->
status
=
JOB_TASK_STATUS_NOT_START
;
SCH_SET_TASK_STATUS
(
pTask
,
JOB_TASK_STATUS_NOT_START
)
;
pTask
->
taskId
=
atomic_add_fetch_64
(
&
schMgmt
.
taskId
,
1
);
return
TSDB_CODE_SUCCESS
;
...
...
@@ -236,7 +236,7 @@ int32_t schSetTaskCandidateAddrs(SSchJob *pJob, SSchTask *pTask) {
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
SCH_TASK_DLOG
(
"use execNode from plan as candidate addr
"
);
SCH_TASK_DLOG
(
"use execNode from plan as candidate addr
, numOfEps:%d"
,
pTask
->
plan
->
execNode
.
numOfEps
);
return
TSDB_CODE_SUCCESS
;
}
...
...
@@ -273,13 +273,19 @@ int32_t schSetTaskCandidateAddrs(SSchJob *pJob, SSchTask *pTask) {
}
int32_t
schPushTaskToExecList
(
SSchJob
*
pJob
,
SSchTask
*
pTask
)
{
if
(
0
!=
taosHashPut
(
pJob
->
execTasks
,
&
pTask
->
taskId
,
sizeof
(
pTask
->
taskId
),
&
pTask
,
POINTER_BYTES
))
{
qError
(
"failed to add new task, taskId:0x%"
PRIx64
", reqId:0x"
PRIx64
", out of memory"
,
pJob
->
queryId
);
int32_t
code
=
taosHashPut
(
pJob
->
execTasks
,
&
pTask
->
taskId
,
sizeof
(
pTask
->
taskId
),
&
pTask
,
POINTER_BYTES
);
if
(
0
!=
code
)
{
if
(
HASH_NODE_EXIST
(
code
))
{
SCH_TASK_ELOG
(
"task already in exec list, code:%x"
,
code
);
SCH_ERR_RET
(
TSDB_CODE_SCH_INTERNAL_ERROR
);
}
SCH_TASK_ELOG
(
"taosHashPut task to exec list failed, errno:%d"
,
errno
);
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
qDebug
(
"add one task, taskId:0x%"
PRIx64
", numOfTasks:%d, reqId:0x%"
PRIx64
,
pTask
->
taskId
,
taosHashGetSize
(
pJob
->
execTasks
),
pJob
->
queryId
);
SCH_TASK_DLOG
(
"task added to exec list, numOfTasks:%d"
,
taosHashGetSize
(
pJob
->
execTasks
));
return
TSDB_CODE_SUCCESS
;
}
...
...
@@ -387,7 +393,7 @@ int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) {
return
TSDB_CODE_SUCCESS
;
}
pTask
->
status
=
JOB_TASK_STATUS_SUCCEED
;
SCH_SET_TASK_STATUS
(
pTask
,
JOB_TASK_STATUS_SUCCEED
)
;
int32_t
parentNum
=
pTask
->
parents
?
(
int32_t
)
taosArrayGetSize
(
pTask
->
parents
)
:
0
;
if
(
parentNum
==
0
)
{
...
...
@@ -460,10 +466,14 @@ int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode)
if
(
!
needRetry
)
{
SCH_TASK_ELOG
(
"task failed[%x], no more retry"
,
errCode
);
SCH_ERR_RET
(
schMoveTaskToFailList
(
pJob
,
pTask
,
&
moved
));
if
(
!
moved
)
{
SCH_TASK_ELOG
(
"task may already moved, status:%d"
,
pTask
->
status
);
}
if
(
SCH_GET_TASK_STATUS
(
pTask
)
==
JOB_TASK_STATUS_EXECUTING
)
{
SCH_ERR_RET
(
schMoveTaskToFailList
(
pJob
,
pTask
,
&
moved
));
if
(
!
moved
)
{
SCH_TASK_ELOG
(
"task may already moved, status:%d"
,
pTask
->
status
);
}
}
SCH_SET_TASK_STATUS
(
pTask
,
JOB_TASK_STATUS_FAILED
);
if
(
SCH_TASK_NEED_WAIT_ALL
(
pTask
))
{
SCH_LOCK
(
SCH_WRITE
,
&
pTask
->
level
->
lock
);
...
...
@@ -476,11 +486,10 @@ int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode)
return
TSDB_CODE_SUCCESS
;
}
}
pJob
->
status
=
JOB_TASK_STATUS_FAILED
;
SCH_ERR_RET
(
schProcessOnJobFailure
(
pJob
,
errCode
));
return
TSDB_CODE_SUCCESS
;
return
errCode
;
}
SCH_ERR_RET
(
schLaunchTask
(
pJob
,
pTask
));
...
...
@@ -706,7 +715,7 @@ void schConvertAddrToEpSet(SQueryNodeAddr *addr, SEpSet *epSet) {
}
int32_t
schBuildAndSendMsg
(
SSchJob
*
job
,
SSchTask
*
t
ask
,
int32_t
msgType
)
{
int32_t
schBuildAndSendMsg
(
SSchJob
*
pJob
,
SSchTask
*
pT
ask
,
int32_t
msgType
)
{
uint32_t
msgSize
=
0
;
void
*
msg
=
NULL
;
int32_t
code
=
0
;
...
...
@@ -714,22 +723,22 @@ int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType) {
switch
(
msgType
)
{
case
TDMT_VND_CREATE_TABLE
:
case
TDMT_VND_SUBMIT
:
{
if
(
NULL
==
task
->
msg
||
t
ask
->
msgLen
<=
0
)
{
if
(
NULL
==
pTask
->
msg
||
pT
ask
->
msgLen
<=
0
)
{
qError
(
"submit msg is NULL"
);
SCH_ERR_RET
(
TSDB_CODE_SCH_INTERNAL_ERROR
);
}
msgSize
=
t
ask
->
msgLen
;
msg
=
t
ask
->
msg
;
msgSize
=
pT
ask
->
msgLen
;
msg
=
pT
ask
->
msg
;
break
;
}
case
TDMT_VND_QUERY
:
{
if
(
NULL
==
t
ask
->
msg
)
{
if
(
NULL
==
pT
ask
->
msg
)
{
qError
(
"query msg is NULL"
);
SCH_ERR_RET
(
TSDB_CODE_SCH_INTERNAL_ERROR
);
}
msgSize
=
sizeof
(
SSubQueryMsg
)
+
t
ask
->
msgLen
;
msgSize
=
sizeof
(
SSubQueryMsg
)
+
pT
ask
->
msgLen
;
msg
=
calloc
(
1
,
msgSize
);
if
(
NULL
==
msg
)
{
qError
(
"calloc %d failed"
,
msgSize
);
...
...
@@ -738,12 +747,12 @@ int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType) {
SSubQueryMsg
*
pMsg
=
msg
;
pMsg
->
header
.
vgId
=
htonl
(
t
ask
->
plan
->
execNode
.
nodeId
);
pMsg
->
header
.
vgId
=
htonl
(
pT
ask
->
plan
->
execNode
.
nodeId
);
pMsg
->
sId
=
htobe64
(
schMgmt
.
sId
);
pMsg
->
queryId
=
htobe64
(
j
ob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
t
ask
->
taskId
);
pMsg
->
contentLen
=
htonl
(
t
ask
->
msgLen
);
memcpy
(
pMsg
->
msg
,
task
->
msg
,
t
ask
->
msgLen
);
pMsg
->
queryId
=
htobe64
(
pJ
ob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
pT
ask
->
taskId
);
pMsg
->
contentLen
=
htonl
(
pT
ask
->
msgLen
);
memcpy
(
pMsg
->
msg
,
pTask
->
msg
,
pT
ask
->
msgLen
);
break
;
}
case
TDMT_VND_RES_READY
:
{
...
...
@@ -756,14 +765,14 @@ int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType) {
SResReadyMsg
*
pMsg
=
msg
;
pMsg
->
header
.
vgId
=
htonl
(
t
ask
->
plan
->
execNode
.
nodeId
);
pMsg
->
header
.
vgId
=
htonl
(
pT
ask
->
plan
->
execNode
.
nodeId
);
pMsg
->
sId
=
htobe64
(
schMgmt
.
sId
);
pMsg
->
queryId
=
htobe64
(
j
ob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
t
ask
->
taskId
);
pMsg
->
queryId
=
htobe64
(
pJ
ob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
pT
ask
->
taskId
);
break
;
}
case
TDMT_VND_FETCH
:
{
if
(
NULL
==
t
ask
)
{
if
(
NULL
==
pT
ask
)
{
SCH_ERR_RET
(
TSDB_CODE_QRY_APP_ERROR
);
}
msgSize
=
sizeof
(
SResFetchMsg
);
...
...
@@ -775,10 +784,10 @@ int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType) {
SResFetchMsg
*
pMsg
=
msg
;
pMsg
->
header
.
vgId
=
htonl
(
t
ask
->
plan
->
execNode
.
nodeId
);
pMsg
->
header
.
vgId
=
htonl
(
pT
ask
->
plan
->
execNode
.
nodeId
);
pMsg
->
sId
=
htobe64
(
schMgmt
.
sId
);
pMsg
->
queryId
=
htobe64
(
j
ob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
t
ask
->
taskId
);
pMsg
->
queryId
=
htobe64
(
pJ
ob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
pT
ask
->
taskId
);
break
;
}
case
TDMT_VND_DROP_TASK
:{
...
...
@@ -791,10 +800,10 @@ int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType) {
STaskDropMsg
*
pMsg
=
msg
;
pMsg
->
header
.
vgId
=
htonl
(
t
ask
->
plan
->
execNode
.
nodeId
);
pMsg
->
header
.
vgId
=
htonl
(
pT
ask
->
plan
->
execNode
.
nodeId
);
pMsg
->
sId
=
htobe64
(
schMgmt
.
sId
);
pMsg
->
queryId
=
htobe64
(
j
ob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
t
ask
->
taskId
);
pMsg
->
queryId
=
htobe64
(
pJ
ob
->
queryId
);
pMsg
->
taskId
=
htobe64
(
pT
ask
->
taskId
);
break
;
}
default:
...
...
@@ -804,11 +813,11 @@ int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType) {
}
SEpSet
epSet
;
SQueryNodeAddr
*
addr
=
taosArrayGet
(
task
->
candidateAddrs
,
t
ask
->
candidateIdx
);
SQueryNodeAddr
*
addr
=
taosArrayGet
(
pTask
->
candidateAddrs
,
pT
ask
->
candidateIdx
);
schConvertAddrToEpSet
(
addr
,
&
epSet
);
SCH_ERR_JRET
(
schAsyncSendMsg
(
job
->
transport
,
&
epSet
,
job
->
queryId
,
t
ask
->
taskId
,
msgType
,
msg
,
msgSize
));
SCH_ERR_JRET
(
schAsyncSendMsg
(
pJob
->
transport
,
&
epSet
,
pJob
->
queryId
,
pT
ask
->
taskId
,
msgType
,
msg
,
msgSize
));
return
TSDB_CODE_SUCCESS
;
...
...
@@ -818,25 +827,51 @@ _return:
SCH_RET
(
code
);
}
static
FORCE_INLINE
bool
schJobNeedToStop
(
SSchJob
*
pJob
,
int8_t
*
pStatus
)
{
int8_t
status
=
SCH_GET_JOB_STATUS
(
pJob
);
if
(
pStatus
)
{
*
pStatus
=
status
;
}
return
(
status
==
JOB_TASK_STATUS_FAILED
||
status
==
JOB_TASK_STATUS_CANCELLED
||
status
==
JOB_TASK_STATUS_CANCELLING
||
status
==
JOB_TASK_STATUS_DROPPING
);
}
int32_t
schLaunchTask
(
SSchJob
*
pJob
,
SSchTask
*
pTask
)
{
SSubplan
*
plan
=
pTask
->
plan
;
int8_t
status
=
0
;
int32_t
code
=
0
;
if
(
schJobNeedToStop
(
pJob
,
&
status
))
{
SCH_TASK_ELOG
(
"no need to launch task cause of job status, job status:%d"
,
status
);
SCH_ERR_RET
(
pJob
->
errCode
);
}
SCH_ERR_RET
(
qSubPlanToString
(
plan
,
&
pTask
->
msg
,
&
pTask
->
msgLen
));
SCH_ERR_RET
(
schSetTaskCandidateAddrs
(
pJob
,
pTask
));
SSubplan
*
plan
=
pTask
->
plan
;
if
(
NULL
==
pTask
->
candidateAddrs
||
taosArrayGetSize
(
pTask
->
candidateAddrs
)
<=
0
)
{
SCH_TASK_ELOG
(
"no valid candidate node for task:%"
PRIx64
,
pTask
->
taskId
);
SCH_ERR_RET
(
TSDB_CODE_SCH_INTERNAL_ERROR
);
if
(
NULL
==
pTask
->
msg
)
{
code
=
qSubPlanToString
(
plan
,
&
pTask
->
msg
,
&
pTask
->
msgLen
);
if
(
TSDB_CODE_SUCCESS
!=
code
||
NULL
==
pTask
->
msg
||
pTask
->
msgLen
<=
0
)
{
SCH_TASK_ELOG
(
"subplanToString error, code:%x, msg:%p, len:%d"
,
code
,
pTask
->
msg
,
pTask
->
msgLen
);
SCH_ERR_JRET
(
code
);
}
}
SCH_ERR_JRET
(
schSetTaskCandidateAddrs
(
pJob
,
pTask
));
// NOTE: race condition: the task should be put into the hash table before send msg to server
SCH_ERR_RET
(
schPushTaskToExecList
(
pJob
,
pTask
));
SCH_ERR_RET
(
schBuildAndSendMsg
(
pJob
,
pTask
,
plan
->
msgType
));
SCH_ERR_JRET
(
schPushTaskToExecList
(
pJob
,
pTask
));
SCH_SET_TASK_STATUS
(
pTask
,
JOB_TASK_STATUS_EXECUTING
);
pTask
->
status
=
JOB_TASK_STATUS_EXECUTING
;
SCH_ERR_JRET
(
schBuildAndSendMsg
(
pJob
,
pTask
,
plan
->
msgType
))
;
return
TSDB_CODE_SUCCESS
;
_return:
code
=
schProcessOnTaskFailure
(
pJob
,
pTask
,
code
);
SCH_RET
(
code
);
}
int32_t
schLaunchJob
(
SSchJob
*
pJob
)
{
...
...
@@ -936,11 +971,11 @@ int32_t schExecJobImpl(void *transport, SArray *nodeList, SQueryDag* pDag, void*
*
(
SSchJob
**
)
job
=
pJob
;
if
(
syncSchedule
)
{
SCH_JOB_DLOG
(
"will wait for rsp now
"
);
SCH_JOB_DLOG
(
"will wait for rsp now
, job status:%d"
,
SCH_GET_JOB_STATUS
(
pJob
)
);
tsem_wait
(
&
pJob
->
rspSem
);
}
SCH_JOB_DLOG
(
"job exec done
"
);
SCH_JOB_DLOG
(
"job exec done
, job status:%d"
,
SCH_GET_JOB_STATUS
(
pJob
)
);
return
TSDB_CODE_SUCCESS
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录