scheduler.c 18.9 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14 15
 */

D
dapan1121 已提交
16 17
#include "schedulerInt.h"
#include "taosmsg.h"
18
#include "query.h"
D
dapan 已提交
19
#include "catalog.h"
20 21

SSchedulerMgmt schMgmt = {0};
D
dapan1121 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53


int32_t schBuildAndSendRequest(void *pRpc, const SEpSet* pMgmtEps, __taos_async_fn_t fp) {
/*
  SRequestObj *pRequest = createRequest(pTscObj, fp, param, TSDB_SQL_CONNECT);
  if (pRequest == NULL) {
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
  }

  SRequestMsgBody body = {0};
  buildConnectMsg(pRequest, &body);

  int64_t transporterId = 0;
  sendMsgToServer(pTscObj->pTransporter, &pTscObj->pAppInfo->mgmtEp.epSet, &body, &transporterId);

  tsem_wait(&pRequest->body.rspSem);
  destroyConnectMsg(&body);

  if (pRequest->code != TSDB_CODE_SUCCESS) {
    const char *errorMsg = (pRequest->code == TSDB_CODE_RPC_FQDN_ERROR) ? taos_errstr(pRequest) : tstrerror(terrno);
    printf("failed to connect to server, reason: %s\n\n", errorMsg);

    destroyRequest(pRequest);
    taos_close(pTscObj);
    pTscObj = NULL;
  } else {
    tscDebug("0x%"PRIx64" connection is opening, connId:%d, dnodeConn:%p", pTscObj->id, pTscObj->connId, pTscObj->pTransporter);
    destroyRequest(pRequest);
  }
*/  
}

D
dapan1121 已提交
54 55 56
int32_t schBuildTaskRalation(SQueryJob *job, SHashObj *planToTask) {
  for (int32_t i = 0; i < job->levelNum; ++i) {
    SQueryLevel *level = taosArrayGet(job->levels, i);
D
dapan 已提交
57
    
D
dapan1121 已提交
58 59 60
    for (int32_t m = 0; m < level->taskNum; ++m) {
      SQueryTask *task = taosArrayGet(level->subTasks, m);
      SSubplan *plan = task->plan;
D
dapan1121 已提交
61 62
      int32_t childNum = plan->pChildern ? (int32_t)taosArrayGetSize(plan->pChildern) : 0;
      int32_t parentNum = plan->pParents ? (int32_t)taosArrayGetSize(plan->pParents) : 0;
D
dapan1121 已提交
63 64

      if (childNum > 0) {
D
dapan 已提交
65 66
        task->children = taosArrayInit(childNum, POINTER_BYTES);
        if (NULL == task->children) {
D
dapan1121 已提交
67 68 69 70 71 72 73 74 75 76 77 78 79
          qError("taosArrayInit %d failed", childNum);
          SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
      }

      for (int32_t n = 0; n < childNum; ++n) {
        SSubplan *child = taosArrayGet(plan->pChildern, n);
        SQueryTask *childTask = taosHashGet(planToTask, &child, POINTER_BYTES);
        if (childTask) {
          qError("subplan relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n);
          SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
        }

D
dapan 已提交
80
        if (NULL == taosArrayPush(task->children, &childTask)) {
D
dapan1121 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
          qError("taosArrayPush failed");
          SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
      }

      if (parentNum > 0) {
        task->parents = taosArrayInit(parentNum, POINTER_BYTES);
        if (NULL == task->parents) {
          qError("taosArrayInit %d failed", parentNum);
          SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
      }

      for (int32_t n = 0; n < parentNum; ++n) {
        SSubplan *parent = taosArrayGet(plan->pParents, n);
        SQueryTask *parentTask = taosHashGet(planToTask, &parent, POINTER_BYTES);
        if (parentTask) {
          qError("subplan relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n);
          SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
        }

        if (NULL == taosArrayPush(task->parents, &parentTask)) {
          qError("taosArrayPush failed");
          SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
      }      
    }
  }

D
dapan 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122
  SQueryLevel *level = taosArrayGet(job->levels, 0);
  if (level->taskNum > 1) {
    qError("invalid plan info, level 0, taskNum:%d", level->taskNum);
    SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
  }

  SQueryTask *task = taosArrayGet(level->subTasks, 0);
  if (task->parents && taosArrayGetSize(task->parents) > 0) {
    qError("invalid plan info, level 0, parentNum:%d", (int32_t)taosArrayGetSize(task->parents));
    SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
  }


D
dapan1121 已提交
123 124 125 126
  return TSDB_CODE_SUCCESS;
}


127
int32_t schValidateAndBuildJob(SQueryDag *dag, SQueryJob *job) {
D
dapan1121 已提交
128
  int32_t code = 0;
D
dapan 已提交
129

D
dapan1121 已提交
130 131
  job->queryId = dag->queryId;
  
D
dapan 已提交
132 133 134 135 136
  if (dag->numOfSubplans <= 0) {
    qError("invalid subplan num:%d", dag->numOfSubplans);
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
  }
  
137 138 139
  int32_t levelNum = (int32_t)taosArrayGetSize(dag->pSubplans);
  if (levelNum <= 0) {
    qError("invalid level num:%d", levelNum);
D
dapan1121 已提交
140
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
141 142
  }

D
dapan1121 已提交
143 144 145 146 147 148
  SHashObj *planToTask = taosHashInit(SCHEDULE_DEFAULT_TASK_NUMBER, taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
  if (NULL == planToTask) {
    qError("taosHashInit %d failed", SCHEDULE_DEFAULT_TASK_NUMBER);
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
  
149 150 151
  job->levels = taosArrayInit(levelNum, sizeof(SQueryLevel));
  if (NULL == job->levels) {
    qError("taosArrayInit %d failed", levelNum);
D
dapan1121 已提交
152
    SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
153 154 155 156 157 158 159 160 161 162 163
  }

  job->levelNum = levelNum;
  job->levelIdx = levelNum - 1;

  job->subPlans = dag->pSubplans;

  SQueryLevel level = {0};
  SArray *levelPlans = NULL;
  int32_t levelPlanNum = 0;

D
dapan1121 已提交
164
  level.status = JOB_TASK_STATUS_NOT_START;
D
dapan1121 已提交
165

166
  for (int32_t i = 0; i < levelNum; ++i) {
D
dapan1121 已提交
167
    level.level = i;
168 169 170
    levelPlans = taosArrayGetP(dag->pSubplans, i);
    if (NULL == levelPlans) {
      qError("no level plans for level %d", i);
D
dapan1121 已提交
171
      SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
172 173 174 175 176
    }

    levelPlanNum = (int32_t)taosArrayGetSize(levelPlans);
    if (levelPlanNum <= 0) {
      qError("invalid level plans number:%d, level:%d", levelPlanNum, i);
D
dapan1121 已提交
177
      SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
D
dapan1121 已提交
178 179 180 181 182 183 184
    }

    level.taskNum = levelPlanNum;
    
    level.subTasks = taosArrayInit(levelPlanNum, sizeof(SQueryTask));
    if (NULL == level.subTasks) {
      qError("taosArrayInit %d failed", levelPlanNum);
D
dapan1121 已提交
185
      SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
186 187 188
    }
    
    for (int32_t n = 0; n < levelPlanNum; ++n) {
D
dapan1121 已提交
189
      SSubplan *plan = taosArrayGet(levelPlans, n);
D
dapan1121 已提交
190
      SQueryTask task = {0};
D
dapan1121 已提交
191
      
D
dapan1121 已提交
192 193
      task.taskId = atomic_add_fetch_64(&schMgmt.taskId, 1);
      task.plan = plan;
194
      task.status = JOB_TASK_STATUS_NOT_START;
D
dapan1121 已提交
195

D
dapan1121 已提交
196 197 198 199 200 201 202
      void *p = taosArrayPush(level.subTasks, &task);
      if (NULL == p) {
        qError("taosArrayPush failed");
        SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }
      
      if (0 != taosHashPut(planToTask, &plan, POINTER_BYTES, &p, POINTER_BYTES)) {
D
dapan1121 已提交
203 204 205
        qError("taosHashPut failed");
        SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }
D
dapan1121 已提交
206
    }
207

D
dapan1121 已提交
208 209 210
    if (NULL == taosArrayPush(job->levels, &level)) {
      qError("taosArrayPush failed");
      SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
211 212 213
    }
  }

D
dapan1121 已提交
214 215 216 217 218 219
  SCH_ERR_JRET(schBuildTaskRalation(job, planToTask));

  if (planToTask) {
    taosHashCleanup(planToTask);
  }
  
220
  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
221 222 223 224 225 226

_return:
  if (level.subTasks) {
    taosArrayDestroy(level.subTasks);
  }

D
dapan1121 已提交
227 228 229 230
  if (planToTask) {
    taosHashCleanup(planToTask);
  }

D
dapan1121 已提交
231
  SCH_RET(code);
232 233
}

D
dapan1121 已提交
234
int32_t schSetTaskExecEpSet(SQueryJob *job, SEpSet *epSet) {  
D
dapan 已提交
235
  if (epSet->numOfEps >= SCH_MAX_CONDIDATE_EP_NUM) {
D
dapan 已提交
236 237 238
    return TSDB_CODE_SUCCESS;
  }

D
dapan1121 已提交
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
  int32_t qnodeNum = taosArrayGetSize(job->qnodeList);
  
  for (int32_t i = 0; i < qnodeNum && epSet->numOfEps < tListLen(epSet->port); ++i) {
    SEpAddr *addr = taosArrayGet(job->qnodeList, i);
    
    strncpy(epSet->fqdn[epSet->numOfEps], addr->fqdn, sizeof(addr->fqdn));
    epSet->port[epSet->numOfEps] = addr->port;
    
    ++epSet->numOfEps;
  }

  for (int32_t i = 0; i < job->dataSrcEps.numOfEps && epSet->numOfEps < tListLen(epSet->port); ++i) {
    strncpy(epSet->fqdn[epSet->numOfEps], job->dataSrcEps.fqdn[i], sizeof(job->dataSrcEps.fqdn[i]));
    epSet->port[epSet->numOfEps] = job->dataSrcEps.port[i];
    
    ++epSet->numOfEps;
D
dapan 已提交
255
  }
D
dapan 已提交
256 257

  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
258
}
D
dapan1121 已提交
259

D
dapan1121 已提交
260

D
dapan 已提交
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
int32_t schPushTaskToExecList(SQueryJob *job, SQueryTask *task) {
  if (0 != taosHashPut(job->execTasks, &task->taskId, sizeof(task->taskId), &task, POINTER_BYTES)) {
    qError("taosHashPut failed");
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

  return TSDB_CODE_SUCCESS;
}

int32_t schMoveTaskToSuccList(SQueryJob *job, SQueryTask *task, bool *moved) {
  if (0 != taosHashRemove(job->execTasks, &task->taskId, sizeof(task->taskId))) {
    qWarn("remove task[%"PRIx64"] from execTasks failed", task->taskId);
    return TSDB_CODE_SUCCESS;
  }

  if (0 != taosHashPut(job->execTasks, &task->taskId, sizeof(task->taskId), &task, POINTER_BYTES)) {
    qError("taosHashPut failed");
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

  *moved = true;
  
  return TSDB_CODE_SUCCESS;
}


D
dapan 已提交
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
int32_t schAsyncSendMsg(SQueryJob *job, SQueryTask *task, int32_t msgType) {
  int32_t msgSize = 0;
  void *msg = NULL;
  
  switch (msgType) {
    case TSDB_MSG_TYPE_QUERY: {
      if (NULL == task->msg) {
        qError("query msg is NULL");
        SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
      }

      int32_t len = strlen(task->msg);
      msgSize = sizeof(SSchedulerQueryMsg) + len;
      msg = calloc(1, msgSize);
      if (NULL == msg) {
        qError("calloc %d failed", msgSize);
        SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }

      SSchedulerQueryMsg *pMsg = msg;
D
dapan1121 已提交
307
      
D
dapan1121 已提交
308 309 310 311
      pMsg->schedulerId = htobe64(schMgmt.schedulerId);
      pMsg->queryId = htobe64(job->queryId);
      pMsg->taskId = htobe64(task->taskId);
      pMsg->contentLen = htonl(len);
D
dapan 已提交
312 313 314
      memcpy(pMsg->msg, task->msg, len);
      break;
    }
D
dapan1121 已提交
315
    case TSDB_MSG_TYPE_RES_READY: {
D
dapan 已提交
316 317 318 319 320 321 322 323
      msgSize = sizeof(SSchedulerReadyMsg);
      msg = calloc(1, msgSize);
      if (NULL == msg) {
        qError("calloc %d failed", msgSize);
        SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }

      SSchedulerReadyMsg *pMsg = msg;
D
dapan1121 已提交
324 325
      pMsg->queryId = htobe64(job->queryId);
      pMsg->taskId = htobe64(task->taskId);      
D
dapan 已提交
326 327 328 329 330 331 332 333 334 335 336
      break;
    }
    case TSDB_MSG_TYPE_FETCH: {
      msgSize = sizeof(SSchedulerFetchMsg);
      msg = calloc(1, msgSize);
      if (NULL == msg) {
        qError("calloc %d failed", msgSize);
        SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }
    
      SSchedulerFetchMsg *pMsg = msg;
D
dapan1121 已提交
337 338
      pMsg->queryId = htobe64(job->queryId);
      pMsg->taskId = htobe64(task->taskId);      
D
dapan 已提交
339 340 341 342 343 344
      break;
    }
    default:
      qError("unknown msg type:%d", msgType);
      break;
  }
D
dapan1121 已提交
345

D
dapan 已提交
346 347 348
  //TODO SEND MSG

  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
349 350
}

D
dapan 已提交
351
int32_t schTaskCheckAndSetRetry(SQueryJob *job, SQueryTask *task, int32_t errCode, bool *needRetry) {
D
dapan1121 已提交
352 353
  // TODO set retry or not based on task type/errCode/retry times/job status/available eps...
  // TODO if needRetry, set task retry info
D
dapan 已提交
354

D
dapan1121 已提交
355 356 357
  *needRetry = false;

  return TSDB_CODE_SUCCESS;
D
dapan 已提交
358 359
}

D
dapan 已提交
360

D
dapan 已提交
361
int32_t schFetchFromRemote(SQueryJob *job) {
D
dapan 已提交
362 363 364 365 366 367 368 369 370 371 372 373 374
  int32_t code = 0;
  
  if (atomic_val_compare_exchange_32(&job->remoteFetch, 0, 1) != 0) {
    qInfo("prior fetching not finished");
    return TSDB_CODE_SUCCESS;
  }

  SCH_ERR_JRET(schAsyncSendMsg(job, NULL, TSDB_MSG_TYPE_FETCH));

  return TSDB_CODE_SUCCESS;
  
_return:
  atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0);
D
dapan 已提交
375

D
dapan 已提交
376
  return code;
D
dapan 已提交
377 378
}

D
dapan 已提交
379 380

int32_t schProcessOnJobSuccess(SQueryJob *job) {
D
dapan1121 已提交
381
  job->status = JOB_TASK_STATUS_SUCCEED;
D
dapan1121 已提交
382
  
D
dapan 已提交
383 384 385
  if (job->userFetch) {
    SCH_ERR_RET(schFetchFromRemote(job));
  }
D
dapan 已提交
386

D
dapan 已提交
387
  return TSDB_CODE_SUCCESS;
D
dapan 已提交
388 389 390
}

int32_t schProcessOnJobFailure(SQueryJob *job) {
D
dapan1121 已提交
391
  job->status = JOB_TASK_STATUS_FAILED;
D
dapan1121 已提交
392

D
dapan 已提交
393 394 395 396 397
  atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0);

  if (job->userFetch) {
    tsem_post(&job->rspSem);
  }
D
dapan 已提交
398

D
dapan 已提交
399 400 401 402 403 404 405
  return TSDB_CODE_SUCCESS;
}

int32_t schProcessOnDataFetched(SQueryJob *job) {
  atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0);

  tsem_post(&job->rspSem);
D
dapan 已提交
406 407 408 409 410 411 412 413
}


int32_t schProcessOnTaskSuccess(SQueryJob *job, SQueryTask *task) {
  bool moved = false;
  
  SCH_ERR_RET(schMoveTaskToSuccList(job, task, &moved));
  if (!moved) {
D
dapan 已提交
414
    SCH_TASK_ERR_LOG("task may already moved, status:%d", task->status);
D
dapan 已提交
415 416
    return TSDB_CODE_SUCCESS;
  }
D
dapan1121 已提交
417

D
dapan1121 已提交
418
  task->status = JOB_TASK_STATUS_SUCCEED;
D
dapan 已提交
419 420 421 422 423 424 425 426
  
  int32_t parentNum = (int32_t)taosArrayGetSize(task->parents);
  if (parentNum == 0) {
    if (task->plan->level != 0) {
      qError("level error");
      SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
    }

D
dapan 已提交
427 428 429
    strncpy(job->resEp.fqdn, task->execAddr.fqdn, sizeof(job->resEp.fqdn));
    job->resEp.port = task->execAddr.port;

D
dapan 已提交
430
    SCH_ERR_RET(schProcessOnJobSuccess(job));
D
dapan 已提交
431 432 433 434

    return TSDB_CODE_SUCCESS;
  }

D
dapan 已提交
435
  if (SCH_IS_DATA_SRC_TASK(task) && job->dataSrcEps.numOfEps < SCH_MAX_CONDIDATE_EP_NUM) {
D
dapan 已提交
436 437 438 439 440 441
    strncpy(job->dataSrcEps.fqdn[job->dataSrcEps.numOfEps], task->execAddr.fqdn, sizeof(task->execAddr.fqdn));
    job->dataSrcEps.port[job->dataSrcEps.numOfEps] = task->execAddr.port;

    ++job->dataSrcEps.numOfEps;
  }

D
dapan 已提交
442 443 444 445 446
  for (int32_t i = 0; i < parentNum; ++i) {
    SQueryTask *par = taosArrayGet(task->parents, i);

    ++par->childReady;

D
dapan 已提交
447
    SCH_ERR_RET(qSetSubplanExecutionNode(par->plan, task->plan->id.templateId, &task->execAddr));
D
dapan 已提交
448 449
    
    if (SCH_TASK_READY_TO_LUNCH(par)) {
D
dapan1121 已提交
450
      SCH_ERR_RET(schLaunchTask(job, task));
D
dapan 已提交
451 452 453 454 455 456 457 458 459 460 461 462 463
    }
  }

  return TSDB_CODE_SUCCESS;
}

int32_t schProcessOnTaskFailure(SQueryJob *job, SQueryTask *task, int32_t errCode) {
  bool needRetry = false;
  SCH_ERR_RET(schTaskCheckAndSetRetry(job, task, errCode, &needRetry));
  
  if (!needRetry) {
    SCH_TASK_ERR_LOG("task failed[%x], no more retry", errCode);
    
D
dapan1121 已提交
464
    job->status = JOB_TASK_STATUS_FAILED;
D
dapan 已提交
465 466 467 468 469
    SCH_ERR_RET(schProcessOnJobFailure(job));

    return TSDB_CODE_SUCCESS;
  }

D
dapan1121 已提交
470
  SCH_ERR_RET(schLaunchTask(job, task));
D
dapan 已提交
471 472 473 474

  return TSDB_CODE_SUCCESS;
}

D
dapan1121 已提交
475 476 477 478 479 480 481 482
int32_t schHandleRspMsg(SQueryJob *job, SQueryTask *task, int32_t msgType, int32_t rspCode) {
  int32_t code = 0;
  
  switch (msgType) {
    case TSDB_MSG_TYPE_QUERY:
      if (rspCode != TSDB_CODE_SUCCESS) {
        SCH_ERR_JRET(schProcessOnTaskFailure(job, task, rspCode));
      } else {
D
dapan1121 已提交
483
        code = schAsyncSendMsg(job, task, TSDB_MSG_TYPE_RES_READY);
D
dapan1121 已提交
484 485 486 487 488
        if (code) {
          goto _task_error;
        }
      }
      break;
D
dapan1121 已提交
489
    case TSDB_MSG_TYPE_RES_READY:
D
dapan1121 已提交
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
      if (rspCode != TSDB_CODE_SUCCESS) {
        SCH_ERR_JRET(schProcessOnTaskFailure(job, task, rspCode));
      } else {
        code = schProcessOnTaskSuccess(job, task);
        if (code) {
          goto _task_error;
        }        
      }
      break;
    case TSDB_MSG_TYPE_FETCH:
      SCH_ERR_JRET(rspCode);
      SCH_ERR_JRET(schProcessOnDataFetched(job));
      break;
    default:
      qError("unknown msg type:%d received", msgType);
      return TSDB_CODE_QRY_INVALID_INPUT;
  }

  return TSDB_CODE_SUCCESS;

_task_error:
  SCH_ERR_JRET(schProcessOnTaskFailure(job, task, code));
  return TSDB_CODE_SUCCESS;

_return:
  code = schProcessOnJobFailure(job);
  return code;
}

D
dapan 已提交
519

D
dapan1121 已提交
520 521


D
dapan1121 已提交
522
int32_t schLaunchTask(SQueryJob *job, SQueryTask *task) {
D
dapan1121 已提交
523
  SSubplan *plan = task->plan;
524 525
  int32_t len = 0;
  SCH_ERR_RET(qSubPlanToString(plan, &task->msg, &len));
D
dapan 已提交
526
  if (plan->execEpSet.numOfEps <= 0) {
D
dapan1121 已提交
527 528 529 530 531 532
    SCH_ERR_RET(schSetTaskExecEpSet(job, &plan->execEpSet));
  }

  if (plan->execEpSet.numOfEps <= 0) {
    SCH_TASK_ERR_LOG("invalid execEpSet num:%d", plan->execEpSet.numOfEps);
    SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
D
dapan1121 已提交
533
  }
D
dapan 已提交
534
  
D
dapan 已提交
535
  SCH_ERR_RET(schAsyncSendMsg(job, task, TSDB_MSG_TYPE_QUERY));
D
dapan 已提交
536

D
dapan 已提交
537
  SCH_ERR_RET(schPushTaskToExecList(job, task));
D
dapan1121 已提交
538

D
dapan1121 已提交
539
  task->status = JOB_TASK_STATUS_EXECUTING;
D
dapan1121 已提交
540

D
dapan1121 已提交
541 542 543
  return TSDB_CODE_SUCCESS;
}

D
dapan1121 已提交
544
int32_t schLaunchJob(SQueryJob *job) {
D
dapan 已提交
545 546 547
  SQueryLevel *level = taosArrayGet(job->levels, job->levelIdx);
  for (int32_t i = 0; i < level->taskNum; ++i) {
    SQueryTask *task = taosArrayGet(level->subTasks, i);
D
dapan1121 已提交
548
    SCH_ERR_RET(schLaunchTask(job, task));
D
dapan1121 已提交
549
  }
D
dapan1121 已提交
550

D
dapan1121 已提交
551
  job->status = JOB_TASK_STATUS_EXECUTING;
D
dapan 已提交
552
  
D
dapan1121 已提交
553
  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
554 555
}

556 557

int32_t schedulerInit(SSchedulerCfg *cfg) {
D
dapan 已提交
558 559
  if (cfg) {
    schMgmt.cfg = *cfg;
D
dapan1121 已提交
560 561
  } else {
    schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_JOB_NUMBER;
D
dapan 已提交
562 563
  }

D
dapan1121 已提交
564 565 566 567 568 569 570
  schMgmt.Jobs = taosHashInit(schMgmt.cfg.maxJobNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
  if (NULL == schMgmt.Jobs) {
    SCH_ERR_LRET(TSDB_CODE_QRY_OUT_OF_MEMORY, "init %d schduler jobs failed", schMgmt.cfg.maxJobNum);
  }

  schMgmt.schedulerId = 1; //TODO GENERATE A UUID
  
571 572 573 574
  return TSDB_CODE_SUCCESS;
}


D
dapan1121 已提交
575 576
int32_t scheduleExecJob(void *transport, SArray *qnodeList, SQueryDag* pDag, void** pJob) {
  if (NULL == transport || NULL == transport ||NULL == pDag || NULL == pDag->pSubplans || NULL == pJob) {
D
dapan1121 已提交
577
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
578 579
  }

D
dapan1121 已提交
580 581 582 583
  if (taosArrayGetSize(qnodeList) <= 0) {
    qInfo("qnodeList is empty");
  }

D
dapan1121 已提交
584
  int32_t code = 0;
585 586
  SQueryJob *job = calloc(1, sizeof(SQueryJob));
  if (NULL == job) {
D
dapan1121 已提交
587
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
588 589
  }

D
dapan1121 已提交
590 591
  job->transport = transport;
  job->qnodeList = qnodeList;
D
dapan 已提交
592

D
dapan1121 已提交
593
  SCH_ERR_JRET(schValidateAndBuildJob(pDag, job));
D
dapan1121 已提交
594

D
dapan 已提交
595 596 597 598 599 600 601 602 603 604 605
  job->execTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
  if (NULL == job->execTasks) {
    qError("taosHashInit %d failed", pDag->numOfSubplans);
    SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

  job->succTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
  if (NULL == job->succTasks) {
    qError("taosHashInit %d failed", pDag->numOfSubplans);
    SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
D
dapan 已提交
606 607

  tsem_init(&job->rspSem, 0, 0);
D
dapan 已提交
608
  
D
dapan1121 已提交
609 610 611 612 613
  if (0 != taosHashPut(schMgmt.Jobs, &job->queryId, sizeof(job->queryId), &job, POINTER_BYTES)) {
    qError("taosHashPut queryId:%"PRIx64" failed", job->queryId);
    SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR);
  }

D
dapan1121 已提交
614
  job->status = JOB_TASK_STATUS_NOT_START;
D
dapan1121 已提交
615
  
D
dapan1121 已提交
616
  SCH_ERR_JRET(schLaunchJob(job));
617 618

  *(SQueryJob **)pJob = job;
D
dapan1121 已提交
619

D
dapan1121 已提交
620
  return TSDB_CODE_SUCCESS;
621

D
dapan1121 已提交
622
_return:
623

D
dapan1121 已提交
624 625 626 627
  *(SQueryJob **)pJob = NULL;
  scheduleFreeJob(job);
  
  SCH_RET(code);
628
}
D
dapan1121 已提交
629

D
dapan1121 已提交
630 631
int32_t scheduleFetchRows(void *pJob, void **data) {
  if (NULL == pJob || NULL == data) {
D
dapan 已提交
632 633 634 635
    return TSDB_CODE_QRY_INVALID_INPUT;
  }

  SQueryJob *job = pJob;
D
dapan 已提交
636
  int32_t code = 0;
D
dapan 已提交
637 638 639 640 641 642

  if (atomic_val_compare_exchange_32(&job->userFetch, 0, 1) != 0) {
    qError("prior fetching not finished");
    return TSDB_CODE_QRY_APP_ERROR;
  }

D
dapan1121 已提交
643
  if (job->status == JOB_TASK_STATUS_SUCCEED) {
D
dapan 已提交
644 645
    SCH_ERR_JRET(schFetchFromRemote(job));
  }
D
dapan 已提交
646 647 648 649

  tsem_wait(&job->rspSem);

  *data = job->res;
D
dapan 已提交
650
  job->res = NULL;
D
dapan 已提交
651

D
dapan 已提交
652 653 654 655
_return:
  atomic_val_compare_exchange_32(&job->userFetch, 1, 0);

  return code;
D
dapan 已提交
656
}
D
dapan1121 已提交
657

D
dapan1121 已提交
658 659
int32_t scheduleCancelJob(void *pJob) {
  //TODO
D
dapan1121 已提交
660

D
dapan1121 已提交
661 662 663 664 665
  return TSDB_CODE_SUCCESS;
}

void scheduleFreeJob(void *pJob) {
  if (NULL == pJob) {
D
dapan 已提交
666 667
    return;
  }
D
dapan1121 已提交
668

D
dapan1121 已提交
669 670 671 672 673 674 675 676
  SQueryJob *job = pJob;

  if (job->status > 0) {
    if (0 != taosHashRemove(schMgmt.Jobs, &job->queryId, sizeof(job->queryId))) {
      qError("remove job:%"PRIx64"from mgmt failed", job->queryId); // maybe already freed
      return;
    }

D
dapan1121 已提交
677
    if (job->status == JOB_TASK_STATUS_EXECUTING) {
D
dapan1121 已提交
678 679 680 681 682
      scheduleCancelJob(pJob);
    }
  }
  
  //TODO free job
D
dapan1121 已提交
683 684
}

685 686
void schedulerDestroy(void) {
  if (schMgmt.Jobs) {
D
dapan1121 已提交
687
    taosHashCleanup(schMgmt.Jobs); //TODO
688 689 690 691
    schMgmt.Jobs = NULL;
  }
}

D
dapan1121 已提交
692