schedulerInt.h 8.1 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef _TD_SCHEDULER_INT_H_
#define _TD_SCHEDULER_INT_H_

#ifdef __cplusplus
extern "C" {
#endif

23 24 25 26
#include "os.h"
#include "tarray.h"
#include "planner.h"
#include "scheduler.h"
27
#include "thash.h"
28

D
dapan1121 已提交
29 30
#define SCHEDULE_DEFAULT_MAX_JOB_NUM 1000
#define SCHEDULE_DEFAULT_MAX_TASK_NUM 1000
D
dapan 已提交
31 32
#define SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM 20  // unit is TSDB_TABLE_NUM_UNIT

33

34
#define SCH_MAX_CANDIDATE_EP_NUM TSDB_MAX_REPLICA
D
dapan 已提交
35

D
dapan1121 已提交
36 37 38 39 40
enum {
  SCH_READ = 1,
  SCH_WRITE,
};

D
dapan1121 已提交
41 42 43 44 45
typedef struct SSchTrans {
  void *transInst;
  void *transHandle;
} SSchTrans;

D
dapan1121 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
typedef struct SSchApiStat {

} SSchApiStat;

typedef struct SSchRuntimeStat {

} SSchRuntimeStat;

typedef struct SSchJobStat {

} SSchJobStat;

typedef struct SSchedulerStat {
  SSchApiStat      api;
  SSchRuntimeStat  runtime;
  SSchJobStat      job;
} SSchedulerStat;


65
typedef struct SSchedulerMgmt {
D
dapan1121 已提交
66 67 68
  uint64_t       taskId; // sequential taksId
  uint64_t       sId;    // schedulerId
  SSchedulerCfg  cfg;
D
dapan1121 已提交
69
  int32_t        jobRef;
D
dapan1121 已提交
70
  SSchedulerStat stat;
71
} SSchedulerMgmt;
72

D
dapan1121 已提交
73 74
typedef struct SSchCallbackParam {
  uint64_t queryId;
D
dapan1121 已提交
75
  int64_t  refId;
D
dapan1121 已提交
76
  uint64_t taskId;
D
dapan1121 已提交
77
  SEpSet   epSet;
D
dapan1121 已提交
78 79
} SSchCallbackParam;

D
dapan1121 已提交
80 81
typedef struct SSchFlowControl {
  SRWLatch  lock;
D
dapan1121 已提交
82
  bool      sorted;
D
dapan 已提交
83
  int32_t   tableNumSum;
D
dapan1121 已提交
84
  uint32_t  execTaskNum;
D
dapan1121 已提交
85
  SArray   *taskList;      // Element is SSchTask*
D
dapan1121 已提交
86 87
} SSchFlowControl;

D
dapan 已提交
88
typedef struct SSchLevel {
D
dapan1121 已提交
89 90 91 92 93 94 95 96 97
  int32_t         level;
  int8_t          status;
  SRWLatch        lock;
  int32_t         taskFailed;
  int32_t         taskSucceed;
  int32_t         taskNum;
  int32_t         taskLaunchedNum;
  SHashObj       *flowCtrl;      // key is ep, element is SSchFlowControl
  SArray         *subTasks;      // Element is SQueryTask
D
dapan 已提交
98
} SSchLevel;
D
dapan1121 已提交
99

D
dapan 已提交
100
typedef struct SSchTask {
D
dapan1121 已提交
101
  uint64_t             taskId;         // task id
D
dapan1121 已提交
102
  SRWLatch             lock;           // task lock
D
dapan1121 已提交
103 104 105 106 107
  SSchLevel           *level;          // level
  SSubplan            *plan;           // subplan
  char                *msg;            // operator tree
  int32_t              msgLen;         // msg length
  int8_t               status;         // task status
D
dapan1121 已提交
108 109
  int32_t              lastMsgType;    // last sent msg type
  SQueryNodeAddr       succeedAddr;    // task executed success node address
110 111
  int8_t               candidateIdx;   // current try condidation index
  SArray              *candidateAddrs; // condidate node addresses, element is SQueryNodeAddr
D
dapan1121 已提交
112
  SArray              *execAddrs;      // all tried node for current task, element is SQueryNodeAddr
D
dapan1121 已提交
113 114 115 116
  SQueryProfileSummary summary;        // task execution summary
  int32_t              childReady;     // child task ready number
  SArray              *children;       // the datasource tasks,from which to fetch the result, element is SQueryTask*
  SArray              *parents;        // the data destination tasks, get data from current task, element is SQueryTask*
D
dapan1121 已提交
117
  void*                handle;          // task send handle 
D
dapan 已提交
118
} SSchTask;
D
dapan1121 已提交
119

D
dapan 已提交
120
typedef struct SSchJobAttr {
D
dapan1121 已提交
121
  bool needFetch;
D
dapan 已提交
122 123
  bool syncSchedule;
  bool queryJob;
D
dapan1121 已提交
124
  bool needFlowCtrl;
D
dapan 已提交
125
} SSchJobAttr;
D
dapan1121 已提交
126

D
dapan 已提交
127
typedef struct SSchJob {
D
dapan1121 已提交
128
  int64_t          refId;
129
  uint64_t         queryId;
D
dapan1121 已提交
130
  SSchJobAttr      attr;
131
  int32_t          levelNum;
D
dapan1121 已提交
132 133 134
  void            *transport;
  SArray          *nodeList;   // qnode/vnode list, element is SQueryNodeAddr
  SArray          *levels;    // Element is SQueryLevel, starting from 0. SArray<SSchLevel>
X
Xiaoyu Wang 已提交
135
  SNodeList       *subPlans;  // subplan pointer copied from DAG, no need to free it in scheduler
D
dapan1121 已提交
136

137
  int32_t          levelIdx;
D
dapan1121 已提交
138
  SEpSet           dataSrcEps;
D
dapan1121 已提交
139 140 141 142 143
  SHashObj        *execTasks; // executing tasks, key:taskid, value:SQueryTask*
  SHashObj        *succTasks; // succeed tasks, key:taskid, value:SQueryTask*
  SHashObj        *failTasks; // failed tasks, key:taskid, value:SQueryTask*

  int8_t           status;  
D
dapan1121 已提交
144
  SQueryNodeAddr   resNode;
D
dapan 已提交
145
  tsem_t           rspSem;
D
dapan1121 已提交
146
  int8_t           userFetch;
D
dapan 已提交
147
  int32_t          remoteFetch;
D
dapan1121 已提交
148
  SSchTask        *fetchTask;
D
dapan 已提交
149
  int32_t          errCode;
D
dapan1121 已提交
150
  void            *res;         //TODO free it or not
D
dapan1121 已提交
151
  int32_t          resNumOfRows;
152
  const char      *sql;
153
  SQueryProfileSummary summary;
D
dapan 已提交
154
} SSchJob;
D
dapan1121 已提交
155

D
dapan1121 已提交
156 157
extern SSchedulerMgmt schMgmt;

D
dapan 已提交
158
#define SCH_TASK_READY_TO_LUNCH(readyNum, task) ((readyNum) >= taosArrayGetSize((task)->children))
D
dapan1121 已提交
159

X
Xiaoyu Wang 已提交
160 161 162
#define SCH_IS_DATA_SRC_TASK(task) ((task)->plan->subplanType == SUBPLAN_TYPE_SCAN)
#define SCH_TASK_NEED_WAIT_ALL(task) ((task)->plan->subplanType == SUBPLAN_TYPE_MODIFY)
#define SCH_TASK_NO_NEED_DROP(task) ((task)->plan->subplanType == SUBPLAN_TYPE_MODIFY)
163

H
Haojun Liao 已提交
164
#define SCH_SET_TASK_STATUS(task, st) atomic_store_8(&(task)->status, st)
D
dapan1121 已提交
165 166
#define SCH_GET_TASK_STATUS(task) atomic_load_8(&(task)->status)

H
Haojun Liao 已提交
167
#define SCH_SET_JOB_STATUS(job, st) atomic_store_8(&(job)->status, st)
D
dapan1121 已提交
168 169
#define SCH_GET_JOB_STATUS(job) atomic_load_8(&(job)->status)

D
dapan1121 已提交
170 171 172 173 174 175 176 177 178 179 180
#define SCH_SET_JOB_NEED_FLOW_CTRL(_job) (_job)->attr.needFlowCtrl = true
#define SCH_JOB_NEED_FLOW_CTRL(_job) ((_job)->attr.needFlowCtrl)
#define SCH_TASK_NEED_FLOW_CTRL(_job, _task) (SCH_IS_DATA_SRC_TASK(_task) && SCH_JOB_NEED_FLOW_CTRL(_job) && SCH_IS_LEAF_TASK(_job, _task) && SCH_IS_LEVEL_UNFINISHED((_task)->level))

#define SCH_SET_JOB_TYPE(_job, type) (_job)->attr.queryJob = ((type) != SUBPLAN_TYPE_MODIFY)
#define SCH_IS_QUERY_JOB(_job) ((_job)->attr.queryJob) 
#define SCH_JOB_NEED_FETCH(_job) SCH_IS_QUERY_JOB(_job)
#define SCH_IS_LEAF_TASK(_job, _task) (((_task)->level->level + 1) == (_job)->levelNum)
#define SCH_IS_LEVEL_UNFINISHED(_level) ((_level)->taskLaunchedNum < (_level)->taskNum)
#define SCH_GET_CUR_EP(_addr) (&(_addr)->epset.eps[(_addr)->epset.inUse])
#define SCH_SWITCH_EPSET(_addr) ((_addr)->epset.inUse = ((_addr)->epset.inUse + 1) % (_addr)->epset.numOfEps)
D
dapan1121 已提交
181

H
Haojun Liao 已提交
182 183
#define SCH_JOB_ELOG(param, ...) qError("QID:0x%" PRIx64 " " param, pJob->queryId, __VA_ARGS__)
#define SCH_JOB_DLOG(param, ...) qDebug("QID:0x%" PRIx64 " " param, pJob->queryId, __VA_ARGS__)
S
Shengliang Guan 已提交
184 185

#define SCH_TASK_ELOG(param, ...) \
186
  qError("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, pTask->taskId, __VA_ARGS__)
S
Shengliang Guan 已提交
187
#define SCH_TASK_DLOG(param, ...) \
188
  qDebug("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, pTask->taskId, __VA_ARGS__)
S
Shengliang Guan 已提交
189
#define SCH_TASK_WLOG(param, ...) \
190
  qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, pTask->taskId, __VA_ARGS__)
D
dapan1121 已提交
191 192 193 194

#define SCH_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0)
#define SCH_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0)
#define SCH_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0)
195

D
dapan1121 已提交
196 197 198
#define SCH_LOCK(type, _lock) (SCH_READ == (type) ? taosRLockLatch(_lock) : taosWLockLatch(_lock))
#define SCH_UNLOCK(type, _lock) (SCH_READ == (type) ? taosRUnLockLatch(_lock) : taosWUnLockLatch(_lock))

199

D
dapan1121 已提交
200 201
int32_t schLaunchTask(SSchJob *job, SSchTask *task);
int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, SQueryNodeAddr *addr, int32_t msgType);
D
dapan1121 已提交
202 203
SSchJob *schAcquireJob(int64_t refId);
int32_t schReleaseJob(int64_t refId);
D
dapan1121 已提交
204 205 206 207 208 209 210
void schFreeFlowCtrl(SSchLevel *pLevel);
int32_t schCheckJobNeedFlowCtrl(SSchJob *pJob, SSchLevel *pLevel);
int32_t schDecTaskFlowQuota(SSchJob *pJob, SSchTask *pTask);
int32_t schCheckIncTaskFlowQuota(SSchJob *pJob, SSchTask *pTask, bool *enough);
int32_t schLaunchTasksInFlowCtrlList(SSchJob *pJob, SSchTask *pTask);
int32_t schLaunchTaskImpl(SSchJob *pJob, SSchTask *pTask);
int32_t schFetchFromRemote(SSchJob *pJob);
D
dapan1121 已提交
211
int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode);
D
dapan1121 已提交
212

D
dapan 已提交
213

H
refact  
Hongze Cheng 已提交
214 215 216 217
#ifdef __cplusplus
}
#endif

D
dapan1121 已提交
218
#endif /*_TD_SCHEDULER_INT_H_*/