scheduler.c 7.1 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14 15
 */

L
Liu Jicong 已提交
16
#include "catalog.h"
H
Hongze Cheng 已提交
17
#include "command.h"
L
Liu Jicong 已提交
18
#include "query.h"
D
dapan1121 已提交
19
#include "schedulerInt.h"
H
Hongze Cheng 已提交
20
#include "tmsg.h"
D
dapan1121 已提交
21
#include "tref.h"
D
dapan1121 已提交
22
#include "trpc.h"
23

D
dapan1121 已提交
24
SSchedulerMgmt schMgmt = {
25
    .jobRef = -1,
D
dapan1121 已提交
26
};
D
dapan1121 已提交
27

D
dapan1121 已提交
28
int32_t schedulerInit(SSchedulerCfg *cfg) {
D
dapan1121 已提交
29
  if (schMgmt.jobRef >= 0) {
D
dapan1121 已提交
30 31 32 33 34 35
    qError("scheduler already initialized");
    return TSDB_CODE_QRY_INVALID_INPUT;
  }

  if (cfg) {
    schMgmt.cfg = *cfg;
L
Liu Jicong 已提交
36

D
dapan1121 已提交
37
    if (schMgmt.cfg.maxJobNum == 0) {
D
dapan1121 已提交
38
      schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_MAX_JOB_NUM;
D
dapan1121 已提交
39
    }
D
dapan1121 已提交
40 41 42
    if (schMgmt.cfg.maxNodeTableNum <= 0) {
      schMgmt.cfg.maxNodeTableNum = SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM;
    }
D
dapan1121 已提交
43
  } else {
D
dapan1121 已提交
44 45
    schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_MAX_JOB_NUM;
    schMgmt.cfg.maxNodeTableNum = SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM;
D
dapan1121 已提交
46
  }
L
Liu Jicong 已提交
47

D
dapan1121 已提交
48 49
  schMgmt.jobRef = taosOpenRef(schMgmt.cfg.maxJobNum, schFreeJobImpl);
  if (schMgmt.jobRef < 0) {
D
dapan1121 已提交
50 51 52 53 54 55 56
    qError("init schduler jobRef failed, num:%u", schMgmt.cfg.maxJobNum);
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

  schMgmt.hbConnections = taosHashInit(100, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
  if (NULL == schMgmt.hbConnections) {
    qError("taosHashInit hb connections failed");
D
dapan1121 已提交
57 58 59
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

D
dapan1121 已提交
60
  if (taosGetSystemUUID((char *)&schMgmt.sId, sizeof(schMgmt.sId))) {
D
dapan1121 已提交
61 62 63 64
    qError("generate schdulerId failed, errno:%d", errno);
    SCH_ERR_RET(TSDB_CODE_QRY_SYS_ERROR);
  }

D
dapan1121 已提交
65
  qInfo("scheduler 0x%" PRIx64 " initizlized, maxJob:%u", schMgmt.sId, schMgmt.cfg.maxJobNum);
L
Liu Jicong 已提交
66

D
dapan1121 已提交
67 68 69
  return TSDB_CODE_SUCCESS;
}

D
dapan1121 已提交
70
int32_t schedulerExecJob(SSchedulerReq *pReq, int64_t *pJobId, SQueryResult *pRes) {
D
dapan1121 已提交
71
  qDebug("scheduler sync exec job start");
D
dapan1121 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85

  int32_t code = 0;  
  SSchJob *pJob = NULL;
  SCH_ERR_JRET(schInitJob(pReq, &pJob));

  *pJobId = pJob->refId;
  
  SCH_ERR_JRET(schExecJobImpl(pReq, pJob, true));

_return:

  if (code && NULL == pJob) {
    qDestroyQueryPlan(pReq->pDag);
  }
D
dapan1121 已提交
86
  
D
dapan1121 已提交
87 88 89
  if (pJob) {
    schSetJobQueryRes(pJob, pRes);
    schReleaseJob(pJob->refId);
D
dapan1121 已提交
90 91
  }

D
dapan1121 已提交
92
  return code;
D
dapan1121 已提交
93 94
}

D
dapan1121 已提交
95
int32_t schedulerAsyncExecJob(SSchedulerReq *pReq, int64_t *pJobId) {
D
dapan1121 已提交
96 97
  qDebug("scheduler async exec job start");

D
dapan1121 已提交
98 99 100 101 102
  int32_t code = 0;  
  SSchJob *pJob = NULL;
  SCH_ERR_JRET(schInitJob(pReq, &pJob));

  *pJobId = pJob->refId;
D
dapan1121 已提交
103
  
D
dapan1121 已提交
104
  SCH_ERR_JRET(schExecJobImpl(pReq, pJob, false));
D
dapan1121 已提交
105 106

_return:
107

D
dapan1121 已提交
108 109 110 111 112 113 114 115 116
  if (code && NULL == pJob) {
    qDestroyQueryPlan(pReq->pDag);
  }
  
  if (pJob) {
    schReleaseJob(pJob->refId);
  }

  return code;
D
dapan1121 已提交
117 118
}

L
Liu Jicong 已提交
119
int32_t schedulerFetchRows(int64_t job, void **pData) {
D
dapan1121 已提交
120 121
  qDebug("scheduler sync fetch rows start");

D
dapan1121 已提交
122
  if (NULL == pData) {
D
dapan1121 已提交
123
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
D
dapan 已提交
124 125
  }

L
Liu Jicong 已提交
126
  int32_t  code = 0;
D
dapan1121 已提交
127
  SSchJob *pJob = schAcquireJob(job);
D
dapan1121 已提交
128
  if (NULL == pJob) {
129
    qError("acquire job from jobRef list failed, may be dropped, jobId:0x%" PRIx64, job);
D
dapan1121 已提交
130 131
    SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
  }
D
dapan1121 已提交
132

D
dapan1121 已提交
133 134
  SCH_ERR_RET(schBeginOperation(pJob, SCH_OP_FETCH, true));

D
dapan1121 已提交
135 136
  pJob->userRes.fetchRes = pData;
  code = schFetchRows(pJob);
D
dapan 已提交
137

D
dapan1121 已提交
138
  schReleaseJob(job);
D
dapan 已提交
139

D
dapan1121 已提交
140 141
  SCH_RET(code);
}
D
dapan1121 已提交
142

D
dapan1121 已提交
143
void schedulerAsyncFetchRows(int64_t job, schedulerFetchFp fp, void* param) {
D
dapan1121 已提交
144 145
  qDebug("scheduler async fetch rows start");

D
dapan1121 已提交
146
  int32_t code = 0;
D
dapan1121 已提交
147
  if (NULL == fp || NULL == param) {
D
dapan1121 已提交
148
    SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
D
dapan1121 已提交
149
  }
D
dapan 已提交
150

D
dapan1121 已提交
151 152
  SSchJob *pJob = schAcquireJob(job);
  if (NULL == pJob) {
D
dapan1121 已提交
153 154
    qError("acquire sch job from job list failed, may be dropped, jobId:0x%" PRIx64, job);
    SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR);
D
dapan1121 已提交
155
  }
D
dapan1121 已提交
156

D
dapan1121 已提交
157 158
  SCH_ERR_JRET(schBeginOperation(pJob, SCH_OP_FETCH, false));
  
D
dapan1121 已提交
159 160 161
  pJob->userRes.fetchFp = fp;
  pJob->userRes.userParam = param;
  
D
dapan1121 已提交
162 163 164
  SCH_ERR_JRET(schAsyncFetchRows(pJob));

_return:
L
Liu Jicong 已提交
165

D
dapan1121 已提交
166 167 168 169
  if (code) {
    fp(NULL, param, code);
  }
  
D
dapan1121 已提交
170
  schReleaseJob(job);
D
dapan 已提交
171
}
D
dapan1121 已提交
172

D
dapan1121 已提交
173 174 175 176
int32_t schedulerGetTasksStatus(int64_t job, SArray *pSub) {
  int32_t  code = 0;
  SSchJob *pJob = schAcquireJob(job);
  if (NULL == pJob) {
D
dapan1121 已提交
177
    qDebug("acquire job from jobRef list failed, may not started or dropped, refId:0x%" PRIx64, job);
D
dapan1121 已提交
178 179 180 181
    SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
  }

  if (pJob->status < JOB_TASK_STATUS_NOT_START || pJob->levelNum <= 0 || NULL == pJob->levels) {
D
dapan1121 已提交
182
    qDebug("job not initialized or not executable job, refId:0x%" PRIx64, job);
D
dapan1121 已提交
183
    SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR);
D
dapan1121 已提交
184 185 186 187
  }

  for (int32_t i = pJob->levelNum - 1; i >= 0; --i) {
    SSchLevel *pLevel = taosArrayGet(pJob->levels, i);
H
Hongze Cheng 已提交
188

D
dapan1121 已提交
189
    for (int32_t m = 0; m < pLevel->taskNum; ++m) {
X
Xiaoyu Wang 已提交
190
      SSchTask     *pTask = taosArrayGet(pLevel->subTasks, m);
191 192 193
      SQuerySubDesc subDesc = {0};
      subDesc.tid = pTask->taskId;
      strcpy(subDesc.status, jobTaskStatusStr(pTask->status));
H
Hongze Cheng 已提交
194

D
dapan1121 已提交
195 196 197 198
      taosArrayPush(pSub, &subDesc);
    }
  }

D
dapan1121 已提交
199 200 201 202 203
_return:

  schReleaseJob(job);

  SCH_RET(code);
D
dapan1121 已提交
204 205
}

D
dapan1121 已提交
206
int32_t scheduleCancelJob(int64_t job) {
D
dapan1121 已提交
207
  SSchJob *pJob = schAcquireJob(job);
D
dapan1121 已提交
208
  if (NULL == pJob) {
209
    qError("acquire job from jobRef list failed, may be dropped, jobId:0x%" PRIx64, job);
D
dapan1121 已提交
210 211
    SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
  }
D
dapan1121 已提交
212

D
dapan1121 已提交
213 214
  int32_t code = schCancelJob(pJob);

D
dapan1121 已提交
215
  schReleaseJob(job);
D
dapan1121 已提交
216 217

  SCH_RET(code);
D
dapan1121 已提交
218 219
}

D
dapan1121 已提交
220 221 222 223 224 225 226 227
void schedulerStopQueryHb(void *pTrans) {
  if (NULL == pTrans) {
    return;
  }

  schCleanClusterHb(pTrans);
}

D
dapan1121 已提交
228 229 230 231 232 233
void schedulerFreeJob(int64_t* job, int32_t errCode) {
  if (0 == *job) {
    return;
  }
  
  SSchJob *pJob = schAcquireJob(*job);
D
dapan1121 已提交
234
  if (NULL == pJob) {
D
dapan1121 已提交
235 236
    qError("acquire sch job failed, may be dropped, jobId:0x%" PRIx64, *job);
    *job = 0;
D
dapan 已提交
237 238
    return;
  }
D
dapan1121 已提交
239

D
dapan1121 已提交
240 241
  int32_t code = schProcessOnJobDropped(pJob, errCode);
  if (TSDB_CODE_SCH_JOB_IS_DROPPING == code) {
D
dapan1121 已提交
242 243
    SCH_JOB_DLOG("sch job is already dropping, refId:0x%" PRIx64, *job);
    *job = 0;
D
dapan1121 已提交
244
    return;
D
dapan1121 已提交
245
  }
D
dapan1121 已提交
246

D
dapan1121 已提交
247
  SCH_JOB_DLOG("start to remove job from jobRef list, refId:0x%" PRIx64, *job);
248

D
dapan1121 已提交
249 250
  if (taosRemoveRef(schMgmt.jobRef, *job)) {
    SCH_JOB_ELOG("remove job from job list failed, refId:0x%" PRIx64, *job);
251
  }
D
dapan1121 已提交
252

D
dapan1121 已提交
253 254
  schReleaseJob(*job);
  *job = 0;
D
dapan1121 已提交
255
}
D
dapan1121 已提交
256

D
dapan1121 已提交
257
void schedulerDestroy(void) {
258 259
  atomic_store_8((int8_t *)&schMgmt.exit, 1);

D
dapan1121 已提交
260
  if (schMgmt.jobRef >= 0) {
D
dapan1121 已提交
261
    SSchJob *pJob = taosIterateRef(schMgmt.jobRef, 0);
H
Hongze Cheng 已提交
262
    int64_t  refId = 0;
C
Cary Xu 已提交
263

D
dapan1121 已提交
264
    while (pJob) {
D
dapan1121 已提交
265
      refId = pJob->refId;
C
Cary Xu 已提交
266 267 268
      if (refId == 0) {
        break;
      }
D
dapan1121 已提交
269
      taosRemoveRef(schMgmt.jobRef, pJob->refId);
L
Liu Jicong 已提交
270

D
dapan1121 已提交
271
      pJob = taosIterateRef(schMgmt.jobRef, refId);
D
dapan1121 已提交
272
    }
D
dapan1121 已提交
273
  }
D
dapan1121 已提交
274

D
dapan1121 已提交
275
  SCH_LOCK(SCH_WRITE, &schMgmt.hbLock);
D
dapan1121 已提交
276
  if (schMgmt.hbConnections) {
H
Hongze Cheng 已提交
277
    void *pIter = taosHashIterate(schMgmt.hbConnections, NULL);
D
dapan1121 已提交
278 279 280 281
    while (pIter != NULL) {
      SSchHbTrans *hb = pIter;
      schFreeRpcCtx(&hb->rpcCtx);
      pIter = taosHashIterate(schMgmt.hbConnections, pIter);
H
Hongze Cheng 已提交
282
    }
D
dapan1121 已提交
283 284 285
    taosHashCleanup(schMgmt.hbConnections);
    schMgmt.hbConnections = NULL;
  }
D
dapan1121 已提交
286
  SCH_UNLOCK(SCH_WRITE, &schMgmt.hbLock);
D
dapan1121 已提交
287
}