Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
99d602b8
TDengine
项目概览
taosdata
/
TDengine
大约 2 年 前同步成功
通知
1192
Star
22018
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
99d602b8
编写于
1月 07, 2022
作者:
D
dapan1121
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
schduler add debug info
上级
b3c24f6e
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
100 addition
and
87 deletion
+100
-87
include/libs/scheduler/scheduler.h
include/libs/scheduler/scheduler.h
+1
-1
source/libs/scheduler/inc/schedulerInt.h
source/libs/scheduler/inc/schedulerInt.h
+23
-19
source/libs/scheduler/src/scheduler.c
source/libs/scheduler/src/scheduler.c
+76
-67
未找到文件。
include/libs/scheduler/scheduler.h
浏览文件 @
99d602b8
...
...
@@ -24,7 +24,7 @@ extern "C" {
#include "catalog.h"
typedef
struct
SSchedulerCfg
{
int32_t
maxJobNum
;
u
int32_t
maxJobNum
;
}
SSchedulerCfg
;
typedef
struct
SQueryProfileSummary
{
...
...
source/libs/scheduler/inc/schedulerInt.h
浏览文件 @
99d602b8
...
...
@@ -37,10 +37,10 @@ enum {
};
typedef
struct
SSchedulerMgmt
{
uint64_t
taskId
;
uint64_t
sId
;
uint64_t
taskId
;
// sequential taksId
uint64_t
sId
;
// schedulerId
SSchedulerCfg
cfg
;
SHashObj
*
jobs
;
// key: queryId, value: SQueryJob*
SHashObj
*
jobs
;
// key: queryId, value: SQueryJob*
}
SSchedulerMgmt
;
typedef
struct
SSchCallbackParam
{
...
...
@@ -83,30 +83,29 @@ typedef struct SSchJobAttr {
typedef
struct
SSchJob
{
uint64_t
queryId
;
SSchJobAttr
attr
;
int32_t
levelNum
;
void
*
transport
;
SArray
*
nodeList
;
// qnode/vnode list, element is SQueryNodeAddr
SArray
*
levels
;
// Element is SQueryLevel, starting from 0. SArray<SSchLevel>
SArray
*
subPlans
;
// subplan pointer copied from DAG, no need to free it in scheduler
int32_t
levelIdx
;
int8_t
status
;
SSchJobAttr
attr
;
SEpSet
dataSrcEps
;
SHashObj
*
execTasks
;
// executing tasks, key:taskid, value:SQueryTask*
SHashObj
*
succTasks
;
// succeed tasks, key:taskid, value:SQueryTask*
SHashObj
*
failTasks
;
// failed tasks, key:taskid, value:SQueryTask*
int8_t
status
;
SEpAddr
resEp
;
void
*
transport
;
SArray
*
nodeList
;
// qnode/vnode list, element is SQueryNodeAddr
tsem_t
rspSem
;
int32_t
userFetch
;
int32_t
remoteFetch
;
SSchTask
*
fetchTask
;
int32_t
errCode
;
void
*
res
;
int32_t
resNumOfRows
;
SHashObj
*
execTasks
;
// executing tasks, key:taskid, value:SQueryTask*
SHashObj
*
succTasks
;
// succeed tasks, key:taskid, value:SQueryTask*
SHashObj
*
failTasks
;
// failed tasks, key:taskid, value:SQueryTask*
SArray
*
levels
;
// Element is SQueryLevel, starting from 0. SArray<SSchLevel>
SArray
*
subPlans
;
// Element is SArray*, and nested element is SSubplan. The execution level of subplan, starting from 0. SArray<void*>
SQueryProfileSummary
summary
;
}
SSchJob
;
...
...
@@ -115,12 +114,17 @@ typedef struct SSchJob {
#define SCH_IS_DATA_SRC_TASK(task) ((task)->plan->type == QUERY_TYPE_SCAN)
#define SCH_TASK_NEED_WAIT_ALL(task) ((task)->plan->type == QUERY_TYPE_MODIFY)
#define SCH_JOB_ERR_LOG(param, ...) qError("QID:%"PRIx64 param, job->queryId, __VA_ARGS__)
#define SCH_TASK_ERR_LOG(param, ...) qError("QID:%"PRIx64",TID:%"PRIx64 param, job->queryId, task->taskId, __VA_ARGS__)
#define SCH_SET_JOB_TYPE(pAttr, type) (pAttr)->queryJob = ((type) != QUERY_TYPE_MODIFY)
#define SCH_JOB_NEED_FETCH(pAttr) ((pAttr)->queryJob)
#define SCH_JOB_ELOG(param, ...) qError("QID:%"PRIx64" " param, pJob->queryId, __VA_ARGS__)
#define SCH_JOB_DLOG(param, ...) qDebug("QID:%"PRIx64" " param, pJob->queryId, __VA_ARGS__)
#define SCH_TASK_ELOG(param, ...) qError("QID:%"PRIx64",TID:%"PRIx64" " param, pJob->queryId, pTask->taskId, __VA_ARGS__)
#define SCH_TASK_DLOG(param, ...) qDebug("QID:%"PRIx64",TID:%"PRIx64" " param, pJob->queryId, pTask->taskId, __VA_ARGS__)
#define SCH_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0)
#define SCH_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0)
#define SCH_ERR_LRET(c,...) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { qError(__VA_ARGS__); terrno = _code; return _code; } } while (0)
#define SCH_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0)
#define SCH_LOCK(type, _lock) (SCH_READ == (type) ? taosRLockLatch(_lock) : taosWLockLatch(_lock))
...
...
source/libs/scheduler/src/scheduler.c
浏览文件 @
99d602b8
...
...
@@ -112,87 +112,87 @@ static void cleanupTask(SSchTask* pTask) {
taosArrayDestroy
(
pTask
->
candidateAddrs
);
}
int32_t
schValidateAndBuildJob
(
SQueryDag
*
dag
,
SSchJob
*
pJ
ob
)
{
int32_t
schValidateAndBuildJob
(
SQueryDag
*
dag
,
SSchJob
*
j
ob
)
{
int32_t
code
=
0
;
pJ
ob
->
queryId
=
dag
->
queryId
;
j
ob
->
queryId
=
dag
->
queryId
;
if
(
dag
->
numOfSubplans
<=
0
)
{
qError
(
"invalid subplan num:%d"
,
dag
->
numOfSubplans
);
SCH_JOB_ELOG
(
"invalid subplan num:%d"
,
dag
->
numOfSubplans
);
SCH_ERR_RET
(
TSDB_CODE_QRY_INVALID_INPUT
);
}
int32_t
levelNum
=
(
int32_t
)
taosArrayGetSize
(
dag
->
pSubplans
);
if
(
levelNum
<=
0
)
{
qError
(
"invalid level num:%d"
,
levelNum
);
SCH_JOB_ELOG
(
"invalid level num:%d"
,
levelNum
);
SCH_ERR_RET
(
TSDB_CODE_QRY_INVALID_INPUT
);
}
SHashObj
*
planToTask
=
taosHashInit
(
SCHEDULE_DEFAULT_TASK_NUMBER
,
taosGetDefaultHashFunction
(
POINTER_BYTES
==
sizeof
(
int64_t
)
?
TSDB_DATA_TYPE_BIGINT
:
TSDB_DATA_TYPE_INT
),
false
,
HASH_NO_LOCK
);
if
(
NULL
==
planToTask
)
{
qError
(
"taosHashInit %d failed"
,
SCHEDULE_DEFAULT_TASK_NUMBER
);
SCH_JOB_ELOG
(
"taosHashInit %d failed"
,
SCHEDULE_DEFAULT_TASK_NUMBER
);
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
pJ
ob
->
levels
=
taosArrayInit
(
levelNum
,
sizeof
(
SSchLevel
));
if
(
NULL
==
pJ
ob
->
levels
)
{
qError
(
"taosArrayInit %d failed"
,
levelNum
);
j
ob
->
levels
=
taosArrayInit
(
levelNum
,
sizeof
(
SSchLevel
));
if
(
NULL
==
j
ob
->
levels
)
{
SCH_JOB_ELOG
(
"taosArrayInit %d failed"
,
levelNum
);
SCH_ERR_JRET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
//??
pJob
->
attr
.
needFetch
=
true
;
job
->
attr
.
needFetch
=
true
;
pJ
ob
->
levelNum
=
levelNum
;
pJ
ob
->
levelIdx
=
levelNum
-
1
;
j
ob
->
levelNum
=
levelNum
;
j
ob
->
levelIdx
=
levelNum
-
1
;
pJ
ob
->
subPlans
=
dag
->
pSubplans
;
j
ob
->
subPlans
=
dag
->
pSubplans
;
SSchLevel
level
=
{
0
};
SArray
*
levelP
lans
=
NULL
;
int32_t
levelPlan
Num
=
0
;
SArray
*
p
lans
=
NULL
;
int32_t
task
Num
=
0
;
SSchLevel
*
pLevel
=
NULL
;
level
.
status
=
JOB_TASK_STATUS_NOT_START
;
for
(
int32_t
i
=
0
;
i
<
levelNum
;
++
i
)
{
if
(
NULL
==
taosArrayPush
(
pJ
ob
->
levels
,
&
level
))
{
qError
(
"taosArrayPush failed"
);
if
(
NULL
==
taosArrayPush
(
j
ob
->
levels
,
&
level
))
{
SCH_JOB_ELOG
(
"taosArrayPush level failed, level:%d"
,
i
);
SCH_ERR_JRET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
pLevel
=
taosArrayGet
(
pJ
ob
->
levels
,
i
);
pLevel
=
taosArrayGet
(
j
ob
->
levels
,
i
);
pLevel
->
level
=
i
;
levelPlans
=
taosArrayGetP
(
dag
->
pSubplans
,
i
);
if
(
NULL
==
levelPlans
)
{
qError
(
"no level plans for level %d"
,
i
);
plans
=
taosArrayGetP
(
dag
->
pSubplans
,
i
);
if
(
NULL
==
plans
)
{
SCH_JOB_ELOG
(
"empty level plan, level:%d"
,
i
);
SCH_ERR_JRET
(
TSDB_CODE_QRY_INVALID_INPUT
);
}
levelPlanNum
=
(
int32_t
)
taosArrayGetSize
(
levelP
lans
);
if
(
levelPlan
Num
<=
0
)
{
qError
(
"invalid level plans number:%d, level:%d"
,
levelPlan
Num
,
i
);
taskNum
=
(
int32_t
)
taosArrayGetSize
(
p
lans
);
if
(
task
Num
<=
0
)
{
SCH_JOB_ELOG
(
"invalid level plan number:%d, level:%d"
,
task
Num
,
i
);
SCH_ERR_JRET
(
TSDB_CODE_QRY_INVALID_INPUT
);
}
pLevel
->
taskNum
=
levelPlan
Num
;
pLevel
->
taskNum
=
task
Num
;
pLevel
->
subTasks
=
taosArrayInit
(
levelPlan
Num
,
sizeof
(
SSchTask
));
pLevel
->
subTasks
=
taosArrayInit
(
task
Num
,
sizeof
(
SSchTask
));
if
(
NULL
==
pLevel
->
subTasks
)
{
qError
(
"taosArrayInit %d failed"
,
levelPlan
Num
);
SCH_JOB_ELOG
(
"taosArrayInit %d failed"
,
task
Num
);
SCH_ERR_JRET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
for
(
int32_t
n
=
0
;
n
<
levelPlan
Num
;
++
n
)
{
SSubplan
*
plan
=
taosArrayGetP
(
levelP
lans
,
n
);
for
(
int32_t
n
=
0
;
n
<
task
Num
;
++
n
)
{
SSubplan
*
plan
=
taosArrayGetP
(
p
lans
,
n
);
if
(
plan
->
type
==
QUERY_TYPE_MODIFY
)
{
pJ
ob
->
attr
.
needFetch
=
false
;
j
ob
->
attr
.
needFetch
=
false
;
}
else
{
pJ
ob
->
attr
.
queryJob
=
true
;
j
ob
->
attr
.
queryJob
=
true
;
}
SSchTask
task
=
initTask
(
pJ
ob
,
plan
,
pLevel
);
SSchTask
task
=
initTask
(
j
ob
,
plan
,
pLevel
);
void
*
p
=
taosArrayPush
(
pLevel
->
subTasks
,
&
task
);
if
(
NULL
==
p
)
{
qError
(
"taosArrayPush failed"
);
...
...
@@ -206,7 +206,7 @@ int32_t schValidateAndBuildJob(SQueryDag *dag, SSchJob *pJob) {
}
}
SCH_ERR_JRET
(
schBuildTaskRalation
(
pJ
ob
,
planToTask
));
SCH_ERR_JRET
(
schBuildTaskRalation
(
j
ob
,
planToTask
));
if
(
planToTask
)
{
taosHashCleanup
(
planToTask
);
...
...
@@ -384,7 +384,7 @@ int32_t schProcessOnTaskSuccess(SSchJob *job, SSchTask *task) {
SCH_ERR_RET
(
schMoveTaskToSuccList
(
job
,
task
,
&
moved
));
if
(
!
moved
)
{
SCH_TASK_E
RR_
LOG
(
" task may already moved, status:%d"
,
task
->
status
);
SCH_TASK_ELOG
(
" task may already moved, status:%d"
,
task
->
status
);
return
TSDB_CODE_SUCCESS
;
}
...
...
@@ -458,11 +458,11 @@ int32_t schProcessOnTaskFailure(SSchJob *job, SSchTask *task, int32_t errCode) {
SCH_ERR_RET
(
schTaskCheckAndSetRetry
(
job
,
task
,
errCode
,
&
needRetry
));
if
(
!
needRetry
)
{
SCH_TASK_E
RR_
LOG
(
"task failed[%x], no more retry"
,
errCode
);
SCH_TASK_ELOG
(
"task failed[%x], no more retry"
,
errCode
);
SCH_ERR_RET
(
schMoveTaskToFailList
(
job
,
task
,
&
moved
));
if
(
!
moved
)
{
SCH_TASK_E
RR_
LOG
(
"task may already moved, status:%d"
,
task
->
status
);
SCH_TASK_ELOG
(
"task may already moved, status:%d"
,
task
->
status
);
}
if
(
SCH_TASK_NEED_WAIT_ALL
(
task
))
{
...
...
@@ -825,7 +825,7 @@ int32_t schLaunchTask(SSchJob *job, SSchTask *task) {
SCH_ERR_RET
(
schSetTaskCandidateAddrs
(
job
,
task
));
if
(
NULL
==
task
->
candidateAddrs
||
taosArrayGetSize
(
task
->
candidateAddrs
)
<=
0
)
{
SCH_TASK_E
RR_
LOG
(
"no valid candidate node for task:%"
PRIx64
,
task
->
taskId
);
SCH_TASK_ELOG
(
"no valid candidate node for task:%"
PRIx64
,
task
->
taskId
);
SCH_ERR_RET
(
TSDB_CODE_SCH_INTERNAL_ERROR
);
}
...
...
@@ -888,41 +888,16 @@ uint64_t schGenSchId(void) {
}
int32_t
schedulerInit
(
SSchedulerCfg
*
cfg
)
{
if
(
schMgmt
.
jobs
)
{
qError
(
"scheduler already init"
);
return
TSDB_CODE_QRY_INVALID_INPUT
;
}
if
(
cfg
)
{
schMgmt
.
cfg
=
*
cfg
;
if
(
schMgmt
.
cfg
.
maxJobNum
<=
0
)
{
schMgmt
.
cfg
.
maxJobNum
=
SCHEDULE_DEFAULT_JOB_NUMBER
;
}
}
else
{
schMgmt
.
cfg
.
maxJobNum
=
SCHEDULE_DEFAULT_JOB_NUMBER
;
}
schMgmt
.
jobs
=
taosHashInit
(
schMgmt
.
cfg
.
maxJobNum
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_UBIGINT
),
false
,
HASH_ENTRY_LOCK
);
if
(
NULL
==
schMgmt
.
jobs
)
{
SCH_ERR_LRET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
,
"init %d schduler jobs failed"
,
schMgmt
.
cfg
.
maxJobNum
);
}
schMgmt
.
sId
=
schGenSchId
();
return
TSDB_CODE_SUCCESS
;
}
int32_t
scheduleExecJobImpl
(
void
*
transport
,
SArray
*
nodeList
,
SQueryDag
*
pDag
,
void
**
pJob
,
bool
syncSchedule
)
{
int32_t
schExecJobImpl
(
void
*
transport
,
SArray
*
nodeList
,
SQueryDag
*
pDag
,
void
**
pJob
,
bool
syncSchedule
)
{
if
(
nodeList
&&
taosArrayGetSize
(
nodeList
)
<=
0
)
{
qInfo
(
"
qnodeList is empty"
);
qInfo
(
"
QID:%"
PRIx64
" input nodeList is empty"
,
pDag
->
queryId
);
}
int32_t
code
=
0
;
SSchJob
*
job
=
calloc
(
1
,
sizeof
(
SSchJob
));
if
(
NULL
==
job
)
{
qError
(
"QID:%"
PRIx64
" calloc %d failed"
,
sizeof
(
SSchJob
));
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
...
...
@@ -975,18 +950,52 @@ int32_t scheduleExecJobImpl(void *transport, SArray *nodeList, SQueryDag* pDag,
return
TSDB_CODE_SUCCESS
;
_return:
*
(
SSchJob
**
)
pJob
=
NULL
;
scheduleFreeJob
(
job
);
SCH_RET
(
code
);
}
int32_t
schedulerInit
(
SSchedulerCfg
*
cfg
)
{
if
(
schMgmt
.
jobs
)
{
qError
(
"scheduler already initialized"
);
return
TSDB_CODE_QRY_INVALID_INPUT
;
}
if
(
cfg
)
{
schMgmt
.
cfg
=
*
cfg
;
if
(
schMgmt
.
cfg
.
maxJobNum
==
0
)
{
schMgmt
.
cfg
.
maxJobNum
=
SCHEDULE_DEFAULT_JOB_NUMBER
;
}
}
else
{
schMgmt
.
cfg
.
maxJobNum
=
SCHEDULE_DEFAULT_JOB_NUMBER
;
}
schMgmt
.
jobs
=
taosHashInit
(
schMgmt
.
cfg
.
maxJobNum
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_UBIGINT
),
false
,
HASH_ENTRY_LOCK
);
if
(
NULL
==
schMgmt
.
jobs
)
{
qError
(
"init schduler jobs failed, num:%u"
,
schMgmt
.
cfg
.
maxJobNum
);
SCH_ERR_RET
(
TSDB_CODE_QRY_OUT_OF_MEMORY
);
}
if
(
taosGetSystemUUID
(
&
schMgmt
.
sId
,
sizeof
(
schMgmt
.
sId
)))
{
qError
(
"generate schdulerId failed, errno:%d"
,
errno
);
SCH_ERR_RET
(
TSDB_CODE_QRY_SYS_ERROR
);
}
qInfo
(
"scheduler %"
PRIx64
" initizlized, maxJob:%u"
,
schMgmt
.
cfg
.
maxJobNum
);
return
TSDB_CODE_SUCCESS
;
}
int32_t
scheduleExecJob
(
void
*
transport
,
SArray
*
nodeList
,
SQueryDag
*
pDag
,
void
**
pJob
,
SQueryResult
*
pRes
)
{
if
(
NULL
==
transport
||
NULL
==
pDag
||
NULL
==
pDag
->
pSubplans
||
NULL
==
pJob
||
NULL
==
pRes
)
{
SCH_ERR_RET
(
TSDB_CODE_QRY_INVALID_INPUT
);
}
SCH_ERR_RET
(
sch
edule
ExecJobImpl
(
transport
,
nodeList
,
pDag
,
pJob
,
true
));
SCH_ERR_RET
(
schExecJobImpl
(
transport
,
nodeList
,
pDag
,
pJob
,
true
));
SSchJob
*
job
=
*
(
SSchJob
**
)
pJob
;
...
...
@@ -1001,7 +1010,7 @@ int32_t scheduleAsyncExecJob(void *transport, SArray *nodeList, SQueryDag* pDag,
SCH_ERR_RET
(
TSDB_CODE_QRY_INVALID_INPUT
);
}
return
sch
edule
ExecJobImpl
(
transport
,
nodeList
,
pDag
,
pJob
,
false
);
return
schExecJobImpl
(
transport
,
nodeList
,
pDag
,
pJob
,
false
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录