scheduler.c 18.6 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14 15
 */

D
dapan1121 已提交
16 17
#include "schedulerInt.h"
#include "taosmsg.h"
18
#include "query.h"
D
dapan 已提交
19
#include "catalog.h"
20 21

SSchedulerMgmt schMgmt = {0};
D
dapan1121 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53


int32_t schBuildAndSendRequest(void *pRpc, const SEpSet* pMgmtEps, __taos_async_fn_t fp) {
/*
  SRequestObj *pRequest = createRequest(pTscObj, fp, param, TSDB_SQL_CONNECT);
  if (pRequest == NULL) {
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
  }

  SRequestMsgBody body = {0};
  buildConnectMsg(pRequest, &body);

  int64_t transporterId = 0;
  sendMsgToServer(pTscObj->pTransporter, &pTscObj->pAppInfo->mgmtEp.epSet, &body, &transporterId);

  tsem_wait(&pRequest->body.rspSem);
  destroyConnectMsg(&body);

  if (pRequest->code != TSDB_CODE_SUCCESS) {
    const char *errorMsg = (pRequest->code == TSDB_CODE_RPC_FQDN_ERROR) ? taos_errstr(pRequest) : tstrerror(terrno);
    printf("failed to connect to server, reason: %s\n\n", errorMsg);

    destroyRequest(pRequest);
    taos_close(pTscObj);
    pTscObj = NULL;
  } else {
    tscDebug("0x%"PRIx64" connection is opening, connId:%d, dnodeConn:%p", pTscObj->id, pTscObj->connId, pTscObj->pTransporter);
    destroyRequest(pRequest);
  }
*/  
}

D
dapan1121 已提交
54 55 56
int32_t schBuildTaskRalation(SQueryJob *job, SHashObj *planToTask) {
  for (int32_t i = 0; i < job->levelNum; ++i) {
    SQueryLevel *level = taosArrayGet(job->levels, i);
D
dapan 已提交
57
    
D
dapan1121 已提交
58 59 60
    for (int32_t m = 0; m < level->taskNum; ++m) {
      SQueryTask *task = taosArrayGet(level->subTasks, m);
      SSubplan *plan = task->plan;
D
dapan1121 已提交
61 62
      int32_t childNum = plan->pChildern ? (int32_t)taosArrayGetSize(plan->pChildern) : 0;
      int32_t parentNum = plan->pParents ? (int32_t)taosArrayGetSize(plan->pParents) : 0;
D
dapan1121 已提交
63 64

      if (childNum > 0) {
D
dapan 已提交
65 66
        task->children = taosArrayInit(childNum, POINTER_BYTES);
        if (NULL == task->children) {
D
dapan1121 已提交
67 68 69 70 71 72 73 74 75 76 77 78 79
          qError("taosArrayInit %d failed", childNum);
          SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
      }

      for (int32_t n = 0; n < childNum; ++n) {
        SSubplan *child = taosArrayGet(plan->pChildern, n);
        SQueryTask *childTask = taosHashGet(planToTask, &child, POINTER_BYTES);
        if (childTask) {
          qError("subplan relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n);
          SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
        }

D
dapan 已提交
80
        if (NULL == taosArrayPush(task->children, &childTask)) {
D
dapan1121 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
          qError("taosArrayPush failed");
          SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
      }

      if (parentNum > 0) {
        task->parents = taosArrayInit(parentNum, POINTER_BYTES);
        if (NULL == task->parents) {
          qError("taosArrayInit %d failed", parentNum);
          SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
      }

      for (int32_t n = 0; n < parentNum; ++n) {
        SSubplan *parent = taosArrayGet(plan->pParents, n);
        SQueryTask *parentTask = taosHashGet(planToTask, &parent, POINTER_BYTES);
        if (parentTask) {
          qError("subplan relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n);
          SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
        }

        if (NULL == taosArrayPush(task->parents, &parentTask)) {
          qError("taosArrayPush failed");
          SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
      }      
    }
  }

D
dapan 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122
  SQueryLevel *level = taosArrayGet(job->levels, 0);
  if (level->taskNum > 1) {
    qError("invalid plan info, level 0, taskNum:%d", level->taskNum);
    SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
  }

  SQueryTask *task = taosArrayGet(level->subTasks, 0);
  if (task->parents && taosArrayGetSize(task->parents) > 0) {
    qError("invalid plan info, level 0, parentNum:%d", (int32_t)taosArrayGetSize(task->parents));
    SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
  }


D
dapan1121 已提交
123 124 125 126
  return TSDB_CODE_SUCCESS;
}


127
int32_t schValidateAndBuildJob(SQueryDag *dag, SQueryJob *job) {
D
dapan1121 已提交
128
  int32_t code = 0;
D
dapan 已提交
129

D
dapan1121 已提交
130 131
  job->queryId = dag->queryId;
  
D
dapan 已提交
132 133 134 135 136
  if (dag->numOfSubplans <= 0) {
    qError("invalid subplan num:%d", dag->numOfSubplans);
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
  }
  
137 138 139
  int32_t levelNum = (int32_t)taosArrayGetSize(dag->pSubplans);
  if (levelNum <= 0) {
    qError("invalid level num:%d", levelNum);
D
dapan1121 已提交
140
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
141 142
  }

D
dapan1121 已提交
143 144 145 146 147 148
  SHashObj *planToTask = taosHashInit(SCHEDULE_DEFAULT_TASK_NUMBER, taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
  if (NULL == planToTask) {
    qError("taosHashInit %d failed", SCHEDULE_DEFAULT_TASK_NUMBER);
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
  
149 150 151
  job->levels = taosArrayInit(levelNum, sizeof(SQueryLevel));
  if (NULL == job->levels) {
    qError("taosArrayInit %d failed", levelNum);
D
dapan1121 已提交
152
    SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
153 154 155 156 157 158 159 160 161 162 163
  }

  job->levelNum = levelNum;
  job->levelIdx = levelNum - 1;

  job->subPlans = dag->pSubplans;

  SQueryLevel level = {0};
  SArray *levelPlans = NULL;
  int32_t levelPlanNum = 0;

D
dapan1121 已提交
164 165
  level.status = SCH_STATUS_NOT_START;

166
  for (int32_t i = 0; i < levelNum; ++i) {
D
dapan1121 已提交
167
    level.level = i;
168 169 170
    levelPlans = taosArrayGetP(dag->pSubplans, i);
    if (NULL == levelPlans) {
      qError("no level plans for level %d", i);
D
dapan1121 已提交
171
      SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
172 173 174 175 176
    }

    levelPlanNum = (int32_t)taosArrayGetSize(levelPlans);
    if (levelPlanNum <= 0) {
      qError("invalid level plans number:%d, level:%d", levelPlanNum, i);
D
dapan1121 已提交
177
      SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
D
dapan1121 已提交
178 179 180 181 182 183 184
    }

    level.taskNum = levelPlanNum;
    
    level.subTasks = taosArrayInit(levelPlanNum, sizeof(SQueryTask));
    if (NULL == level.subTasks) {
      qError("taosArrayInit %d failed", levelPlanNum);
D
dapan1121 已提交
185
      SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
186 187 188
    }
    
    for (int32_t n = 0; n < levelPlanNum; ++n) {
D
dapan1121 已提交
189
      SSubplan *plan = taosArrayGet(levelPlans, n);
D
dapan1121 已提交
190
      SQueryTask task = {0};
D
dapan1121 已提交
191
      
D
dapan1121 已提交
192 193 194
      task.taskId = atomic_add_fetch_64(&schMgmt.taskId, 1);
      task.plan = plan;
      task.status = SCH_STATUS_NOT_START;
D
dapan1121 已提交
195

D
dapan1121 已提交
196 197 198 199 200 201 202
      void *p = taosArrayPush(level.subTasks, &task);
      if (NULL == p) {
        qError("taosArrayPush failed");
        SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }
      
      if (0 != taosHashPut(planToTask, &plan, POINTER_BYTES, &p, POINTER_BYTES)) {
D
dapan1121 已提交
203 204 205
        qError("taosHashPut failed");
        SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }
D
dapan1121 已提交
206
    }
207

D
dapan1121 已提交
208 209 210
    if (NULL == taosArrayPush(job->levels, &level)) {
      qError("taosArrayPush failed");
      SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
211 212 213
    }
  }

D
dapan1121 已提交
214 215 216 217 218 219
  SCH_ERR_JRET(schBuildTaskRalation(job, planToTask));

  if (planToTask) {
    taosHashCleanup(planToTask);
  }
  
220
  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
221 222 223 224 225 226

_return:
  if (level.subTasks) {
    taosArrayDestroy(level.subTasks);
  }

D
dapan1121 已提交
227 228 229 230
  if (planToTask) {
    taosHashCleanup(planToTask);
  }

D
dapan1121 已提交
231
  SCH_RET(code);
232 233
}

D
dapan1121 已提交
234
int32_t schSetTaskExecEpSet(SQueryJob *job, SEpSet *epSet) {  
D
dapan 已提交
235
  if (epSet->numOfEps >= SCH_MAX_CONDIDATE_EP_NUM) {
D
dapan 已提交
236 237 238
    return TSDB_CODE_SUCCESS;
  }

D
dapan1121 已提交
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
  int32_t qnodeNum = taosArrayGetSize(job->qnodeList);
  
  for (int32_t i = 0; i < qnodeNum && epSet->numOfEps < tListLen(epSet->port); ++i) {
    SEpAddr *addr = taosArrayGet(job->qnodeList, i);
    
    strncpy(epSet->fqdn[epSet->numOfEps], addr->fqdn, sizeof(addr->fqdn));
    epSet->port[epSet->numOfEps] = addr->port;
    
    ++epSet->numOfEps;
  }

  for (int32_t i = 0; i < job->dataSrcEps.numOfEps && epSet->numOfEps < tListLen(epSet->port); ++i) {
    strncpy(epSet->fqdn[epSet->numOfEps], job->dataSrcEps.fqdn[i], sizeof(job->dataSrcEps.fqdn[i]));
    epSet->port[epSet->numOfEps] = job->dataSrcEps.port[i];
    
    ++epSet->numOfEps;
D
dapan 已提交
255
  }
D
dapan 已提交
256 257

  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
258
}
D
dapan1121 已提交
259

D
dapan1121 已提交
260

D
dapan 已提交
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
int32_t schPushTaskToExecList(SQueryJob *job, SQueryTask *task) {
  if (0 != taosHashPut(job->execTasks, &task->taskId, sizeof(task->taskId), &task, POINTER_BYTES)) {
    qError("taosHashPut failed");
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

  return TSDB_CODE_SUCCESS;
}

int32_t schMoveTaskToSuccList(SQueryJob *job, SQueryTask *task, bool *moved) {
  if (0 != taosHashRemove(job->execTasks, &task->taskId, sizeof(task->taskId))) {
    qWarn("remove task[%"PRIx64"] from execTasks failed", task->taskId);
    return TSDB_CODE_SUCCESS;
  }

  if (0 != taosHashPut(job->execTasks, &task->taskId, sizeof(task->taskId), &task, POINTER_BYTES)) {
    qError("taosHashPut failed");
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

  *moved = true;
  
  return TSDB_CODE_SUCCESS;
}


D
dapan 已提交
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
int32_t schAsyncSendMsg(SQueryJob *job, SQueryTask *task, int32_t msgType) {
  int32_t msgSize = 0;
  void *msg = NULL;
  
  switch (msgType) {
    case TSDB_MSG_TYPE_QUERY: {
      if (NULL == task->msg) {
        qError("query msg is NULL");
        SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
      }

      int32_t len = strlen(task->msg);
      msgSize = sizeof(SSchedulerQueryMsg) + len;
      msg = calloc(1, msgSize);
      if (NULL == msg) {
        qError("calloc %d failed", msgSize);
        SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }

      SSchedulerQueryMsg *pMsg = msg;
      pMsg->queryId = job->queryId;
      pMsg->taskId = task->taskId;
      pMsg->contentLen = len;
      memcpy(pMsg->msg, task->msg, len);
      break;
    }
    case TSDB_MSG_TYPE_RSP_READY: {
      msgSize = sizeof(SSchedulerReadyMsg);
      msg = calloc(1, msgSize);
      if (NULL == msg) {
        qError("calloc %d failed", msgSize);
        SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }

      SSchedulerReadyMsg *pMsg = msg;
      pMsg->queryId = job->queryId;
      pMsg->taskId = task->taskId;      
      break;
    }
    case TSDB_MSG_TYPE_FETCH: {
      msgSize = sizeof(SSchedulerFetchMsg);
      msg = calloc(1, msgSize);
      if (NULL == msg) {
        qError("calloc %d failed", msgSize);
        SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
      }
    
      SSchedulerFetchMsg *pMsg = msg;
      pMsg->queryId = job->queryId;
      pMsg->taskId = task->taskId;      
      break;
    }
    default:
      qError("unknown msg type:%d", msgType);
      break;
  }
D
dapan1121 已提交
343

D
dapan 已提交
344 345 346
  //TODO SEND MSG

  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
347 348
}

D
dapan 已提交
349
int32_t schTaskCheckAndSetRetry(SQueryJob *job, SQueryTask *task, int32_t errCode, bool *needRetry) {
D
dapan1121 已提交
350 351
  // TODO set retry or not based on task type/errCode/retry times/job status/available eps...
  // TODO if needRetry, set task retry info
D
dapan 已提交
352

D
dapan1121 已提交
353 354 355
  *needRetry = false;

  return TSDB_CODE_SUCCESS;
D
dapan 已提交
356 357
}

D
dapan 已提交
358

D
dapan 已提交
359
int32_t schFetchFromRemote(SQueryJob *job) {
D
dapan 已提交
360 361 362 363 364 365 366 367 368 369 370 371 372
  int32_t code = 0;
  
  if (atomic_val_compare_exchange_32(&job->remoteFetch, 0, 1) != 0) {
    qInfo("prior fetching not finished");
    return TSDB_CODE_SUCCESS;
  }

  SCH_ERR_JRET(schAsyncSendMsg(job, NULL, TSDB_MSG_TYPE_FETCH));

  return TSDB_CODE_SUCCESS;
  
_return:
  atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0);
D
dapan 已提交
373

D
dapan 已提交
374
  return code;
D
dapan 已提交
375 376
}

D
dapan 已提交
377 378

int32_t schProcessOnJobSuccess(SQueryJob *job) {
D
dapan1121 已提交
379 380
  job->status = SCH_STATUS_SUCCEED;
  
D
dapan 已提交
381 382 383
  if (job->userFetch) {
    SCH_ERR_RET(schFetchFromRemote(job));
  }
D
dapan 已提交
384

D
dapan 已提交
385
  return TSDB_CODE_SUCCESS;
D
dapan 已提交
386 387 388
}

int32_t schProcessOnJobFailure(SQueryJob *job) {
D
dapan1121 已提交
389 390
  job->status = SCH_STATUS_FAILED;

D
dapan 已提交
391 392 393 394 395
  atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0);

  if (job->userFetch) {
    tsem_post(&job->rspSem);
  }
D
dapan 已提交
396

D
dapan 已提交
397 398 399 400 401 402 403
  return TSDB_CODE_SUCCESS;
}

int32_t schProcessOnDataFetched(SQueryJob *job) {
  atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0);

  tsem_post(&job->rspSem);
D
dapan 已提交
404 405 406 407 408 409 410 411
}


int32_t schProcessOnTaskSuccess(SQueryJob *job, SQueryTask *task) {
  bool moved = false;
  
  SCH_ERR_RET(schMoveTaskToSuccList(job, task, &moved));
  if (!moved) {
D
dapan 已提交
412
    SCH_TASK_ERR_LOG("task may already moved, status:%d", task->status);
D
dapan 已提交
413 414
    return TSDB_CODE_SUCCESS;
  }
D
dapan1121 已提交
415 416

  task->status = SCH_STATUS_SUCCEED;
D
dapan 已提交
417 418 419 420 421 422 423 424
  
  int32_t parentNum = (int32_t)taosArrayGetSize(task->parents);
  if (parentNum == 0) {
    if (task->plan->level != 0) {
      qError("level error");
      SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
    }

D
dapan 已提交
425 426 427
    strncpy(job->resEp.fqdn, task->execAddr.fqdn, sizeof(job->resEp.fqdn));
    job->resEp.port = task->execAddr.port;

D
dapan 已提交
428
    SCH_ERR_RET(schProcessOnJobSuccess(job));
D
dapan 已提交
429 430 431 432

    return TSDB_CODE_SUCCESS;
  }

D
dapan 已提交
433
  if (SCH_IS_DATA_SRC_TASK(task) && job->dataSrcEps.numOfEps < SCH_MAX_CONDIDATE_EP_NUM) {
D
dapan 已提交
434 435 436 437 438 439
    strncpy(job->dataSrcEps.fqdn[job->dataSrcEps.numOfEps], task->execAddr.fqdn, sizeof(task->execAddr.fqdn));
    job->dataSrcEps.port[job->dataSrcEps.numOfEps] = task->execAddr.port;

    ++job->dataSrcEps.numOfEps;
  }

D
dapan 已提交
440 441 442 443 444
  for (int32_t i = 0; i < parentNum; ++i) {
    SQueryTask *par = taosArrayGet(task->parents, i);

    ++par->childReady;

D
dapan 已提交
445
    SCH_ERR_RET(qSetSubplanExecutionNode(par->plan, task->plan->id.templateId, &task->execAddr));
D
dapan 已提交
446 447
    
    if (SCH_TASK_READY_TO_LUNCH(par)) {
D
dapan1121 已提交
448
      SCH_ERR_RET(schLaunchTask(job, task));
D
dapan 已提交
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
    }
  }

  return TSDB_CODE_SUCCESS;
}

int32_t schProcessOnTaskFailure(SQueryJob *job, SQueryTask *task, int32_t errCode) {
  bool needRetry = false;
  SCH_ERR_RET(schTaskCheckAndSetRetry(job, task, errCode, &needRetry));
  
  if (!needRetry) {
    SCH_TASK_ERR_LOG("task failed[%x], no more retry", errCode);
    
    job->status = SCH_STATUS_FAILED;
    SCH_ERR_RET(schProcessOnJobFailure(job));

    return TSDB_CODE_SUCCESS;
  }

D
dapan1121 已提交
468
  SCH_ERR_RET(schLaunchTask(job, task));
D
dapan 已提交
469 470 471 472

  return TSDB_CODE_SUCCESS;
}

D
dapan1121 已提交
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
int32_t schHandleRspMsg(SQueryJob *job, SQueryTask *task, int32_t msgType, int32_t rspCode) {
  int32_t code = 0;
  
  switch (msgType) {
    case TSDB_MSG_TYPE_QUERY:
      if (rspCode != TSDB_CODE_SUCCESS) {
        SCH_ERR_JRET(schProcessOnTaskFailure(job, task, rspCode));
      } else {
        code = schAsyncSendMsg(job, task, TSDB_MSG_TYPE_RSP_READY);
        if (code) {
          goto _task_error;
        }
      }
      break;
    case TSDB_MSG_TYPE_RSP_READY:
      if (rspCode != TSDB_CODE_SUCCESS) {
        SCH_ERR_JRET(schProcessOnTaskFailure(job, task, rspCode));
      } else {
        code = schProcessOnTaskSuccess(job, task);
        if (code) {
          goto _task_error;
        }        
      }
      break;
    case TSDB_MSG_TYPE_FETCH:
      SCH_ERR_JRET(rspCode);
      SCH_ERR_JRET(schProcessOnDataFetched(job));
      break;
    default:
      qError("unknown msg type:%d received", msgType);
      return TSDB_CODE_QRY_INVALID_INPUT;
  }

  return TSDB_CODE_SUCCESS;

_task_error:
  SCH_ERR_JRET(schProcessOnTaskFailure(job, task, code));
  return TSDB_CODE_SUCCESS;

_return:
  code = schProcessOnJobFailure(job);
  return code;
}

D
dapan 已提交
517

D
dapan1121 已提交
518 519


D
dapan1121 已提交
520
int32_t schLaunchTask(SQueryJob *job, SQueryTask *task) {
D
dapan1121 已提交
521 522
  SSubplan *plan = task->plan;
  
D
dapan 已提交
523 524
  SCH_ERR_RET(qSubPlanToString(plan, &task->msg));
  if (plan->execEpSet.numOfEps <= 0) {
D
dapan1121 已提交
525 526 527 528 529 530
    SCH_ERR_RET(schSetTaskExecEpSet(job, &plan->execEpSet));
  }

  if (plan->execEpSet.numOfEps <= 0) {
    SCH_TASK_ERR_LOG("invalid execEpSet num:%d", plan->execEpSet.numOfEps);
    SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
D
dapan1121 已提交
531
  }
D
dapan 已提交
532
  
D
dapan 已提交
533
  SCH_ERR_RET(schAsyncSendMsg(job, task, TSDB_MSG_TYPE_QUERY));
D
dapan 已提交
534

D
dapan 已提交
535
  SCH_ERR_RET(schPushTaskToExecList(job, task));
D
dapan1121 已提交
536

D
dapan1121 已提交
537 538
  task->status = SCH_STATUS_EXECUTING;

D
dapan1121 已提交
539 540 541
  return TSDB_CODE_SUCCESS;
}

D
dapan1121 已提交
542
int32_t schLaunchJob(SQueryJob *job) {
D
dapan 已提交
543 544 545
  SQueryLevel *level = taosArrayGet(job->levels, job->levelIdx);
  for (int32_t i = 0; i < level->taskNum; ++i) {
    SQueryTask *task = taosArrayGet(level->subTasks, i);
D
dapan1121 已提交
546
    SCH_ERR_RET(schLaunchTask(job, task));
D
dapan1121 已提交
547
  }
D
dapan1121 已提交
548 549

  job->status = SCH_STATUS_EXECUTING;
D
dapan 已提交
550
  
D
dapan1121 已提交
551
  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
552 553
}

554 555 556 557 558 559 560

int32_t schedulerInit(SSchedulerCfg *cfg) {
  schMgmt.Jobs = taosHashInit(SCHEDULE_DEFAULT_JOB_NUMBER, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
  if (NULL == schMgmt.Jobs) {
    SCH_ERR_LRET(TSDB_CODE_QRY_OUT_OF_MEMORY, "init %d schduler jobs failed", SCHEDULE_DEFAULT_JOB_NUMBER);
  }

D
dapan 已提交
561 562 563 564
  if (cfg) {
    schMgmt.cfg = *cfg;
  }

565 566 567 568
  return TSDB_CODE_SUCCESS;
}


D
dapan1121 已提交
569 570
int32_t scheduleExecJob(void *transport, SArray *qnodeList, SQueryDag* pDag, void** pJob) {
  if (NULL == transport || NULL == transport ||NULL == pDag || NULL == pDag->pSubplans || NULL == pJob) {
D
dapan1121 已提交
571
    SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
572 573
  }

D
dapan1121 已提交
574 575 576 577
  if (taosArrayGetSize(qnodeList) <= 0) {
    qInfo("qnodeList is empty");
  }

D
dapan1121 已提交
578
  int32_t code = 0;
579 580
  SQueryJob *job = calloc(1, sizeof(SQueryJob));
  if (NULL == job) {
D
dapan1121 已提交
581
    SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
582 583
  }

D
dapan1121 已提交
584 585
  job->transport = transport;
  job->qnodeList = qnodeList;
D
dapan 已提交
586

D
dapan1121 已提交
587
  SCH_ERR_JRET(schValidateAndBuildJob(pDag, job));
D
dapan1121 已提交
588

D
dapan 已提交
589 590 591 592 593 594 595 596 597 598 599
  job->execTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
  if (NULL == job->execTasks) {
    qError("taosHashInit %d failed", pDag->numOfSubplans);
    SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

  job->succTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
  if (NULL == job->succTasks) {
    qError("taosHashInit %d failed", pDag->numOfSubplans);
    SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
D
dapan 已提交
600 601

  tsem_init(&job->rspSem, 0, 0);
D
dapan 已提交
602
  
D
dapan1121 已提交
603 604 605 606 607 608 609
  if (0 != taosHashPut(schMgmt.Jobs, &job->queryId, sizeof(job->queryId), &job, POINTER_BYTES)) {
    qError("taosHashPut queryId:%"PRIx64" failed", job->queryId);
    SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR);
  }

  job->status = SCH_STATUS_NOT_START;
  
D
dapan1121 已提交
610
  SCH_ERR_JRET(schLaunchJob(job));
611 612

  *(SQueryJob **)pJob = job;
D
dapan1121 已提交
613

D
dapan1121 已提交
614
  return TSDB_CODE_SUCCESS;
615

D
dapan1121 已提交
616
_return:
617

D
dapan1121 已提交
618 619 620 621
  *(SQueryJob **)pJob = NULL;
  scheduleFreeJob(job);
  
  SCH_RET(code);
622
}
D
dapan1121 已提交
623

D
dapan1121 已提交
624 625
int32_t scheduleFetchRows(void *pJob, void **data) {
  if (NULL == pJob || NULL == data) {
D
dapan 已提交
626 627 628 629
    return TSDB_CODE_QRY_INVALID_INPUT;
  }

  SQueryJob *job = pJob;
D
dapan 已提交
630
  int32_t code = 0;
D
dapan 已提交
631 632 633 634 635 636

  if (atomic_val_compare_exchange_32(&job->userFetch, 0, 1) != 0) {
    qError("prior fetching not finished");
    return TSDB_CODE_QRY_APP_ERROR;
  }

D
dapan 已提交
637 638 639
  if (job->status == SCH_STATUS_SUCCEED) {
    SCH_ERR_JRET(schFetchFromRemote(job));
  }
D
dapan 已提交
640 641 642 643

  tsem_wait(&job->rspSem);

  *data = job->res;
D
dapan 已提交
644
  job->res = NULL;
D
dapan 已提交
645

D
dapan 已提交
646 647 648 649
_return:
  atomic_val_compare_exchange_32(&job->userFetch, 1, 0);

  return code;
D
dapan 已提交
650
}
D
dapan1121 已提交
651

D
dapan1121 已提交
652 653
int32_t scheduleCancelJob(void *pJob) {
  //TODO
D
dapan1121 已提交
654

D
dapan1121 已提交
655 656 657 658 659
  return TSDB_CODE_SUCCESS;
}

void scheduleFreeJob(void *pJob) {
  if (NULL == pJob) {
D
dapan 已提交
660 661
    return;
  }
D
dapan1121 已提交
662

D
dapan1121 已提交
663 664 665 666 667 668 669 670 671 672 673 674 675 676
  SQueryJob *job = pJob;

  if (job->status > 0) {
    if (0 != taosHashRemove(schMgmt.Jobs, &job->queryId, sizeof(job->queryId))) {
      qError("remove job:%"PRIx64"from mgmt failed", job->queryId); // maybe already freed
      return;
    }

    if (job->status == SCH_STATUS_EXECUTING) {
      scheduleCancelJob(pJob);
    }
  }
  
  //TODO free job
D
dapan1121 已提交
677 678
}

679 680
void schedulerDestroy(void) {
  if (schMgmt.Jobs) {
D
dapan1121 已提交
681
    taosHashCleanup(schMgmt.Jobs); //TODO
682 683 684 685
    schMgmt.Jobs = NULL;
  }
}

D
dapan1121 已提交
686