scheduler.c 8.6 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14 15
 */

L
Liu Jicong 已提交
16
#include "catalog.h"
H
Hongze Cheng 已提交
17
#include "command.h"
L
Liu Jicong 已提交
18
#include "query.h"
D
dapan1121 已提交
19
#include "schedulerInt.h"
H
Hongze Cheng 已提交
20
#include "tmsg.h"
D
dapan1121 已提交
21
#include "tref.h"
D
dapan1121 已提交
22
#include "trpc.h"
23

D
dapan1121 已提交
24
SSchedulerMgmt schMgmt = {
25
    .jobRef = -1,
D
dapan1121 已提交
26
};
D
dapan1121 已提交
27

D
dapan1121 已提交
28
int32_t schedulerInit(SSchedulerCfg *cfg) {
D
dapan1121 已提交
29
  if (schMgmt.jobRef >= 0) {
D
dapan1121 已提交
30 31 32 33 34 35
    qError("scheduler already initialized");
    return TSDB_CODE_QRY_INVALID_INPUT;
  }

  if (cfg) {
    schMgmt.cfg = *cfg;
L
Liu Jicong 已提交
36

D
dapan1121 已提交
37
    if (schMgmt.cfg.maxJobNum == 0) {
D
dapan1121 已提交
38
      schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_MAX_JOB_NUM;
D
dapan1121 已提交
39
    }
D
dapan1121 已提交
40 41 42
    if (schMgmt.cfg.maxNodeTableNum <= 0) {
      schMgmt.cfg.maxNodeTableNum = SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM;
    }
D
dapan1121 已提交
43
  } else {
D
dapan1121 已提交
44 45
    schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_MAX_JOB_NUM;
    schMgmt.cfg.maxNodeTableNum = SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM;
D
dapan1121 已提交
46
  }
L
Liu Jicong 已提交
47

D
dapan1121 已提交
48 49
  schMgmt.jobRef = taosOpenRef(schMgmt.cfg.maxJobNum, schFreeJobImpl);
  if (schMgmt.jobRef < 0) {
D
dapan1121 已提交
50 51 52 53 54 55 56
    qError("init schduler jobRef failed, num:%u", schMgmt.cfg.maxJobNum);
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

  schMgmt.hbConnections = taosHashInit(100, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
  if (NULL == schMgmt.hbConnections) {
    qError("taosHashInit hb connections failed");
D
dapan1121 已提交
57 58 59
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

D
dapan1121 已提交
60
  if (taosGetSystemUUID((char *)&schMgmt.sId, sizeof(schMgmt.sId))) {
D
dapan1121 已提交
61 62 63 64
    qError("generate schdulerId failed, errno:%d", errno);
    SCH_ERR_RET(TSDB_CODE_QRY_SYS_ERROR);
  }

L
Liu Jicong 已提交
65 66
  qInfo("scheduler %" PRIx64 " initizlized, maxJob:%u", schMgmt.sId, schMgmt.cfg.maxJobNum);

D
dapan1121 已提交
67 68 69
  return TSDB_CODE_SUCCESS;
}

L
Liu Jicong 已提交
70
int32_t schedulerExecJob(void *transport, SArray *nodeList, SQueryPlan *pDag, int64_t *pJob, const char *sql,
D
dapan1121 已提交
71
                         int64_t startTs, SQueryResult *pRes) {
H
Haojun Liao 已提交
72
  if (NULL == transport || NULL == pDag || NULL == pDag->pSubplans || NULL == pJob || NULL == pRes) {
D
dapan1121 已提交
73 74 75
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
  }

D
dapan1121 已提交
76 77 78 79
  int32_t code = 0;
  
  *pJob = 0;
  
D
dapan1121 已提交
80
  if (EXPLAIN_MODE_STATIC == pDag->explainInfo.mode) {
D
dapan1121 已提交
81
    SCH_ERR_RET(schExecStaticExplain(transport, nodeList, pDag, pJob, sql, true));
D
dapan1121 已提交
82
  } else {
D
dapan1121 已提交
83
    SCH_ERR_JRET(schExecJobImpl(transport, nodeList, pDag, pJob, sql, startTs, true));
D
dapan1121 已提交
84
  }
D
dapan1121 已提交
85

D
dapan1121 已提交
86
_return:
D
dapan1121 已提交
87

D
dapan1121 已提交
88 89
  if (*pJob) {
    SSchJob *job = schAcquireJob(*pJob);
L
Liu Jicong 已提交
90

D
dapan1121 已提交
91 92
    pRes->code = atomic_load_32(&job->errCode);
    pRes->numOfRows = job->resNumOfRows;
D
dapan1121 已提交
93 94
    pRes->res = job->queryRes;
    job->queryRes = NULL;
L
Liu Jicong 已提交
95

D
dapan1121 已提交
96 97 98 99
    schReleaseJob(*pJob);
  }

  return code;
D
dapan1121 已提交
100 101
}

L
Liu Jicong 已提交
102
int32_t schedulerAsyncExecJob(void *transport, SArray *pNodeList, SQueryPlan *pDag, const char *sql, int64_t *pJob) {
103
  if (NULL == transport || NULL == pDag || NULL == pDag->pSubplans || NULL == pJob) {
D
dapan1121 已提交
104 105 106
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
  }

D
dapan1121 已提交
107 108 109
  if (EXPLAIN_MODE_STATIC == pDag->explainInfo.mode) {
    SCH_ERR_RET(schExecStaticExplain(transport, pNodeList, pDag, pJob, sql, false));
  } else {
D
dapan1121 已提交
110
    SCH_ERR_RET(schExecJobImpl(transport, pNodeList, pDag, pJob, sql, 0, false));
D
dapan1121 已提交
111
  }
L
Liu Jicong 已提交
112

D
dapan1121 已提交
113
  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
114 115
}

L
Liu Jicong 已提交
116
int32_t schedulerFetchRows(int64_t job, void **pData) {
D
dapan1121 已提交
117
  if (NULL == pData) {
D
dapan1121 已提交
118
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
D
dapan 已提交
119 120
  }

L
Liu Jicong 已提交
121
  int32_t  code = 0;
D
dapan1121 已提交
122
  SSchJob *pJob = schAcquireJob(job);
D
dapan1121 已提交
123 124 125 126
  if (NULL == pJob) {
    qError("acquire job from jobRef list failed, may be dropped, refId:%" PRIx64, job);
    SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
  }
D
dapan1121 已提交
127

D
dapan1121 已提交
128 129
  int8_t status = SCH_GET_JOB_STATUS(pJob);
  if (status == JOB_TASK_STATUS_DROPPING) {
D
dapan1121 已提交
130
    SCH_JOB_ELOG("job is dropping, status:%s", jobTaskStatusStr(status));
D
dapan1121 已提交
131
    schReleaseJob(job);
D
dapan1121 已提交
132
    SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
D
dapan1121 已提交
133 134
  }

D
dapan1121 已提交
135
  if (!SCH_JOB_NEED_FETCH(pJob)) {
D
dapan1121 已提交
136
    SCH_JOB_ELOG("no need to fetch data, status:%s", SCH_GET_JOB_STATUS_STR(pJob));
D
dapan1121 已提交
137
    schReleaseJob(job);
D
dapan1121 已提交
138
    SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR);
D
dapan1121 已提交
139 140
  }

D
dapan1121 已提交
141 142
  if (atomic_val_compare_exchange_8(&pJob->userFetch, 0, 1) != 0) {
    SCH_JOB_ELOG("prior fetching not finished, userFetch:%d", atomic_load_8(&pJob->userFetch));
D
dapan1121 已提交
143
    schReleaseJob(job);
D
dapan1121 已提交
144
    SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR);
D
dapan 已提交
145 146
  }

D
dapan1121 已提交
147
  if (JOB_TASK_STATUS_FAILED == status || JOB_TASK_STATUS_DROPPING == status) {
D
dapan1121 已提交
148
    SCH_JOB_ELOG("job failed or dropping, status:%s", jobTaskStatusStr(status));
D
dapan1121 已提交
149 150
    SCH_ERR_JRET(atomic_load_32(&pJob->errCode));
  } else if (status == JOB_TASK_STATUS_SUCCEED) {
D
dapan1121 已提交
151
    SCH_JOB_DLOG("job already succeed, status:%s", jobTaskStatusStr(status));
D
dapan1121 已提交
152 153
    goto _return;
  } else if (status == JOB_TASK_STATUS_PARTIAL_SUCCEED) {
D
dapan1121 已提交
154
    if (!(pJob->attr.explainMode == EXPLAIN_MODE_STATIC)) {
D
dapan1121 已提交
155 156
      SCH_ERR_JRET(schFetchFromRemote(pJob));
      tsem_wait(&pJob->rspSem);
H
Hongze Cheng 已提交
157
    }
D
dapan1121 已提交
158 159 160
  } else {
    SCH_JOB_ELOG("job status error for fetch, status:%s", jobTaskStatusStr(status));
    SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR);
D
dapan 已提交
161 162
  }

D
dapan1121 已提交
163
  status = SCH_GET_JOB_STATUS(pJob);
D
dapan 已提交
164

D
dapan1121 已提交
165
  if (JOB_TASK_STATUS_FAILED == status || JOB_TASK_STATUS_DROPPING == status) {
D
dapan1121 已提交
166
    SCH_JOB_ELOG("job failed or dropping, status:%s", jobTaskStatusStr(status));
D
dapan1121 已提交
167
    SCH_ERR_JRET(atomic_load_32(&pJob->errCode));
D
dapan 已提交
168
  }
L
Liu Jicong 已提交
169

D
dapan1121 已提交
170
  if (pJob->resData && ((SRetrieveTableRsp *)pJob->resData)->completed) {
D
dapan1121 已提交
171
    SCH_ERR_JRET(schChkUpdateJobStatus(pJob, JOB_TASK_STATUS_SUCCEED));
D
dapan 已提交
172 173
  }

D
dapan1121 已提交
174
  while (true) {
D
dapan1121 已提交
175 176
    *pData = atomic_load_ptr(&pJob->resData);
    if (*pData != atomic_val_compare_exchange_ptr(&pJob->resData, *pData, NULL)) {
D
dapan1121 已提交
177 178 179 180 181
      continue;
    }

    break;
  }
D
dapan 已提交
182

D
dapan1121 已提交
183
  if (NULL == *pData) {
wafwerar's avatar
wafwerar 已提交
184
    SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)taosMemoryCalloc(1, sizeof(SRetrieveTableRsp));
D
dapan1121 已提交
185 186 187 188 189
    if (rsp) {
      rsp->completed = 1;
    }

    *pData = rsp;
D
dapan1121 已提交
190
    SCH_JOB_DLOG("empty res and set query complete, code:%x", code);
D
dapan1121 已提交
191
  }
D
dapan1121 已提交
192

193
  SCH_JOB_DLOG("fetch done, totalRows:%d, code:%s", pJob->resNumOfRows, tstrerror(code));
D
dapan1121 已提交
194 195 196 197

_return:

  atomic_val_compare_exchange_8(&pJob->userFetch, 1, 0);
L
Liu Jicong 已提交
198

D
dapan1121 已提交
199
  schReleaseJob(job);
D
dapan 已提交
200

D
dapan1121 已提交
201
  SCH_RET(code);
D
dapan 已提交
202
}
D
dapan1121 已提交
203

D
dapan1121 已提交
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
int32_t schedulerGetTasksStatus(int64_t job, SArray *pSub) {
  int32_t  code = 0;
  SSchJob *pJob = schAcquireJob(job);
  if (NULL == pJob) {
    qDebug("acquire job from jobRef list failed, may not started or dropped, refId:%" PRIx64, job);
    SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
  }

  if (pJob->status < JOB_TASK_STATUS_NOT_START || pJob->levelNum <= 0 || NULL == pJob->levels) {
    qDebug("job not initialized or not executable job, refId:%" PRIx64, job);
    SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
  }

  for (int32_t i = pJob->levelNum - 1; i >= 0; --i) {
    SSchLevel *pLevel = taosArrayGet(pJob->levels, i);
H
Hongze Cheng 已提交
219

D
dapan1121 已提交
220
    for (int32_t m = 0; m < pLevel->taskNum; ++m) {
X
Xiaoyu Wang 已提交
221
      SSchTask     *pTask = taosArrayGet(pLevel->subTasks, m);
D
dapan1121 已提交
222
      SQuerySubDesc subDesc = {.tid = pTask->taskId, .status = pTask->status};
H
Hongze Cheng 已提交
223

D
dapan1121 已提交
224 225 226 227 228 229 230
      taosArrayPush(pSub, &subDesc);
    }
  }

  return TSDB_CODE_SUCCESS;
}

D
dapan1121 已提交
231
int32_t scheduleCancelJob(int64_t job) {
D
dapan1121 已提交
232
  SSchJob *pJob = schAcquireJob(job);
D
dapan1121 已提交
233 234 235 236
  if (NULL == pJob) {
    qError("acquire job from jobRef list failed, may be dropped, refId:%" PRIx64, job);
    SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
  }
D
dapan1121 已提交
237

D
dapan1121 已提交
238 239
  int32_t code = schCancelJob(pJob);

D
dapan1121 已提交
240
  schReleaseJob(job);
D
dapan1121 已提交
241 242

  SCH_RET(code);
D
dapan1121 已提交
243 244
}

D
dapan1121 已提交
245
void schedulerFreeJob(int64_t job) {
D
dapan1121 已提交
246
  SSchJob *pJob = schAcquireJob(job);
D
dapan1121 已提交
247
  if (NULL == pJob) {
D
dapan1121 已提交
248
    qDebug("acquire job from jobRef list failed, may be dropped, refId:%" PRIx64, job);
D
dapan 已提交
249 250
    return;
  }
D
dapan1121 已提交
251

D
dapan1121 已提交
252 253
  if (atomic_load_8(&pJob->userFetch) > 0) {
    schProcessOnJobDropped(pJob, TSDB_CODE_QRY_JOB_FREED);
D
dapan1121 已提交
254
  }
D
dapan1121 已提交
255

D
dapan1121 已提交
256
  SCH_JOB_DLOG("start to remove job from jobRef list, refId:%" PRIx64, job);
257

D
dapan1121 已提交
258 259
  if (taosRemoveRef(schMgmt.jobRef, job)) {
    SCH_JOB_ELOG("remove job from job list failed, refId:%" PRIx64, job);
260
  }
D
dapan1121 已提交
261 262

  schReleaseJob(job);
D
dapan1121 已提交
263
}
D
dapan1121 已提交
264

D
dapan1121 已提交
265
void schedulerDestroy(void) {
266 267
  atomic_store_8((int8_t *)&schMgmt.exit, 1);

D
dapan1121 已提交
268
  if (schMgmt.jobRef >= 0) {
D
dapan1121 已提交
269
    SSchJob *pJob = taosIterateRef(schMgmt.jobRef, 0);
H
Hongze Cheng 已提交
270
    int64_t  refId = 0;
C
Cary Xu 已提交
271

D
dapan1121 已提交
272
    while (pJob) {
D
dapan1121 已提交
273
      refId = pJob->refId;
C
Cary Xu 已提交
274 275 276
      if (refId == 0) {
        break;
      }
D
dapan1121 已提交
277
      taosRemoveRef(schMgmt.jobRef, pJob->refId);
L
Liu Jicong 已提交
278

D
dapan1121 已提交
279
      pJob = taosIterateRef(schMgmt.jobRef, refId);
D
dapan1121 已提交
280
    }
D
dapan1121 已提交
281
  }
D
dapan1121 已提交
282 283

  if (schMgmt.hbConnections) {
H
Hongze Cheng 已提交
284
    void *pIter = taosHashIterate(schMgmt.hbConnections, NULL);
D
dapan1121 已提交
285 286 287 288
    while (pIter != NULL) {
      SSchHbTrans *hb = pIter;
      schFreeRpcCtx(&hb->rpcCtx);
      pIter = taosHashIterate(schMgmt.hbConnections, pIter);
H
Hongze Cheng 已提交
289
    }
D
dapan1121 已提交
290 291 292
    taosHashCleanup(schMgmt.hbConnections);
    schMgmt.hbConnections = NULL;
  }
D
dapan1121 已提交
293
}