mndScheduler.c 23.8 KB
Newer Older
L
Liu Jicong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "mndScheduler.h"
#include "mndDb.h"
L
Liu Jicong 已提交
18
#include "mndSnode.h"
L
Liu Jicong 已提交
19
#include "mndVgroup.h"
X
Xiaoyu Wang 已提交
20
#include "parser.h"
L
Liu Jicong 已提交
21 22
#include "tcompare.h"
#include "tname.h"
L
Liu Jicong 已提交
23 24
#include "tuuid.h"

25
#define SINK_NODE_LEVEL (0)
L
Liu Jicong 已提交
26
extern bool tsDeployOnSnode;
L
Liu Jicong 已提交
27

28
static int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream);
Y
yihaoDeng 已提交
29 30 31
static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId,
                                      SVgObj* pVgroup, int32_t fillHistory);
static void    setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask);
32

33
int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType,
34
                           int64_t watermark, int64_t deleteMark) {
L
Liu Jicong 已提交
35 36 37 38 39 40 41 42 43
  SNode*      pAst = NULL;
  SQueryPlan* pPlan = NULL;
  terrno = TSDB_CODE_SUCCESS;

  if (nodesStringToNode(ast, &pAst) < 0) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    goto END;
  }

44
  if (qSetSTableIdForRsma(pAst, uid) < 0) {
X
Xiaoyu Wang 已提交
45 46 47 48
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    goto END;
  }

L
Liu Jicong 已提交
49 50 51 52 53
  SPlanContext cxt = {
      .pAstRoot = pAst,
      .topicQuery = false,
      .streamQuery = true,
      .rSmaQuery = true,
L
Liu Jicong 已提交
54
      .triggerType = triggerType,
L
Liu Jicong 已提交
55
      .watermark = watermark,
56
      .deleteMark = deleteMark,
L
Liu Jicong 已提交
57
  };
L
Liu Jicong 已提交
58

L
Liu Jicong 已提交
59 60 61 62 63 64 65 66 67 68
  if (qCreateQueryPlan(&cxt, &pPlan, NULL) < 0) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    goto END;
  }

  int32_t levelNum = LIST_LENGTH(pPlan->pSubplans);
  if (levelNum != 1) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    goto END;
  }
69
  SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
L
Liu Jicong 已提交
70 71 72 73 74 75 76

  int32_t opNum = LIST_LENGTH(inner->pNodeList);
  if (opNum != 1) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    goto END;
  }

77 78
  SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
  if (qSubPlanToString(plan, pDst, pDstLen) < 0) {
L
Liu Jicong 已提交
79 80 81 82 83 84
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    goto END;
  }

END:
  if (pAst) nodesDestroyNode(pAst);
85
  if (pPlan) nodesDestroyNode((SNode*)pPlan);
L
Liu Jicong 已提交
86 87 88
  return terrno;
}

89
int32_t mndSetSinkTaskInfo(SStreamObj* pStream, SStreamTask* pTask) {
L
Liu Jicong 已提交
90
  if (pStream->smaId != 0) {
91
    pTask->outputInfo.type = TASK_OUTPUT__SMA;
L
Liu Jicong 已提交
92 93
    pTask->smaSink.smaId = pStream->smaId;
  } else {
94
    pTask->outputInfo.type = TASK_OUTPUT__TABLE;
L
Liu Jicong 已提交
95 96 97
    pTask->tbSink.stbUid = pStream->targetStbUid;
    memcpy(pTask->tbSink.stbFullName, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN);
    pTask->tbSink.pSchemaWrapper = tCloneSSchemaWrapper(&pStream->outputSchema);
98 99 100
    if (pTask->tbSink.pSchemaWrapper == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }
L
Liu Jicong 已提交
101
  }
102

L
Liu Jicong 已提交
103 104 105
  return 0;
}

Y
yihaoDeng 已提交
106 107
int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SArray* pSinkNodeList,
                                        SStreamTask* pTask) {
L
Liu Jicong 已提交
108 109
  bool isShuffle = false;

L
Liu Jicong 已提交
110 111
  if (pStream->fixedSinkVgId == 0) {
    SDbObj* pDb = mndAcquireDb(pMnode, pStream->targetDb);
wmmhello's avatar
wmmhello 已提交
112
    if (pDb != NULL && pDb->cfg.numOfVgroups > 1) {
L
Liu Jicong 已提交
113
      isShuffle = true;
114
      pTask->outputInfo.type = TASK_OUTPUT__SHUFFLE_DISPATCH;
115
      pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
L
Liu Jicong 已提交
116 117 118
      if (mndExtractDbInfo(pMnode, pDb, &pTask->shuffleDispatcher.dbInfo, NULL) < 0) {
        return -1;
      }
119
    }
L
Liu Jicong 已提交
120

121
    sdbRelease(pMnode->pSdb, pDb);
L
Liu Jicong 已提交
122
  }
123

124 125
  int32_t numOfSinkNodes = taosArrayGetSize(pSinkNodeList);

L
Liu Jicong 已提交
126
  if (isShuffle) {
L
Liu Jicong 已提交
127
    memcpy(pTask->shuffleDispatcher.stbFullName, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN);
128
    SArray* pVgs = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
129 130 131

    int32_t numOfVgroups = taosArrayGetSize(pVgs);
    for (int32_t i = 0; i < numOfVgroups; i++) {
132
      SVgroupInfo* pVgInfo = taosArrayGet(pVgs, i);
133 134

      for (int32_t j = 0; j < numOfSinkNodes; j++) {
135
        SStreamTask* pSinkTask = taosArrayGetP(pSinkNodeList, j);
136
        if (pSinkTask->info.nodeId == pVgInfo->vgId) {
137
          pVgInfo->taskId = pSinkTask->id.taskId;
138
          break;
L
Liu Jicong 已提交
139 140 141
        }
      }
    }
142
  } else {
143 144
    SStreamTask* pOneSinkTask = taosArrayGetP(pSinkNodeList, 0);
    setFixedDownstreamEpInfo(pTask, pOneSinkTask);
L
Liu Jicong 已提交
145
  }
146

L
Liu Jicong 已提交
147 148 149
  return 0;
}

150
int32_t mndAssignStreamTaskToVgroup(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SVgObj* pVgroup) {
L
Liu Jicong 已提交
151
  int32_t msgLen;
152

153 154
  pTask->info.nodeId = pVgroup->vgId;
  pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup);
L
Liu Jicong 已提交
155

156 157
  plan->execNode.nodeId = pTask->info.nodeId;
  plan->execNode.epSet = pTask->info.epSet;
L
Liu Jicong 已提交
158
  if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) {
L
Liu Jicong 已提交
159 160 161
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
162

L
Liu Jicong 已提交
163 164 165
  return 0;
}

L
Liu Jicong 已提交
166
SSnodeObj* mndSchedFetchOneSnode(SMnode* pMnode) {
L
Liu Jicong 已提交
167
  SSnodeObj* pObj = NULL;
L
Liu Jicong 已提交
168 169 170
  void*      pIter = NULL;
  // TODO random fetch
  pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void**)&pObj);
171
  sdbCancelFetch(pMnode->pSdb, pIter);
L
Liu Jicong 已提交
172 173 174
  return pObj;
}

175
int32_t mndAssignStreamTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SSnodeObj* pSnode) {
L
Liu Jicong 已提交
176
  int32_t msgLen;
L
Liu Jicong 已提交
177

178 179
  pTask->info.nodeId = SNODE_HANDLE;
  pTask->info.epSet = mndAcquireEpFromSnode(pMnode, pSnode);
L
Liu Jicong 已提交
180

L
Liu Jicong 已提交
181
  plan->execNode.nodeId = SNODE_HANDLE;
182
  plan->execNode.epSet = pTask->info.epSet;
183
  mDebug("s-task:0x%x set the agg task to snode:%d", pTask->id.taskId, SNODE_HANDLE);
L
Liu Jicong 已提交
184

L
Liu Jicong 已提交
185
  if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) {
L
Liu Jicong 已提交
186 187 188
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
L
Liu Jicong 已提交
189 190 191
  return 0;
}

192
// todo random choose a node to do compute
L
Liu Jicong 已提交
193 194 195 196 197 198 199 200 201 202
SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) {
  void*   pIter = NULL;
  SVgObj* pVgroup = NULL;
  while (1) {
    pIter = sdbFetch(pMnode->pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
    if (pIter == NULL) break;
    if (pVgroup->dbUid != dbUid) {
      sdbRelease(pMnode->pSdb, pVgroup);
      continue;
    }
203
    sdbCancelFetch(pMnode->pSdb, pIter);
L
Liu Jicong 已提交
204 205 206 207 208
    return pVgroup;
  }
  return pVgroup;
}

209
// create sink node for each vgroup.
210
int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, int32_t fillHistory) {
Y
yihaoDeng 已提交
211 212
  SSdb* pSdb = pMnode->pSdb;
  void* pIter = NULL;
L
Liu Jicong 已提交
213 214

  while (1) {
215
    SVgObj* pVgroup = NULL;
L
Liu Jicong 已提交
216
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
217 218 219 220
    if (pIter == NULL) {
      break;
    }

S
Shengliang Guan 已提交
221
    if (!mndVgroupInDb(pVgroup, pStream->targetDbUid)) {
L
Liu Jicong 已提交
222 223 224
      sdbRelease(pSdb, pVgroup);
      continue;
    }
S
Shengliang Guan 已提交
225

226
    mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, fillHistory);
227
    sdbRelease(pSdb, pVgroup);
L
Liu Jicong 已提交
228
  }
229

L
Liu Jicong 已提交
230 231 232
  return 0;
}

Y
yihaoDeng 已提交
233 234
int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup,
                               int32_t fillHistory) {
H
Haojun Liao 已提交
235 236
  int64_t uid = (fillHistory == 0)? pStream->uid:pStream->hTaskUid;
  SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SINK, fillHistory, 0, pTaskList);
L
Liu Jicong 已提交
237 238 239 240 241
  if (pTask == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

242 243
  pTask->info.nodeId = vgId;
  pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup);
244 245 246
  mndSetSinkTaskInfo(pStream, pTask);
  return 0;
}
L
Liu Jicong 已提交
247

248 249
static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList,
                                   SStreamObj* pStream, SSubplan* plan, uint64_t uid, int8_t fillHistory,
250
                                   bool hasExtraSink, int64_t firstWindowSkey) {
251
  SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList);
252 253 254 255
  if (pTask == NULL) {
    return terrno;
  }

256
  // todo set the correct ts, which should be last key of queried table.
H
Haojun Liao 已提交
257
  STimeWindow* pWindow = &pTask->dataRange.window;
258

H
Haojun Liao 已提交
259 260 261
  pWindow->skey = INT64_MIN;
  pWindow->ekey = firstWindowSkey - 1;
  mDebug("add source task 0x%x window:%" PRId64 " - %" PRId64, pTask->id.taskId, pWindow->skey, pWindow->ekey);
262

263 264
  // sink or dispatch
  if (hasExtraSink) {
265
    mndAddDispatcherForInternalTask(pMnode, pStream, pSinkTaskList, pTask);
L
Liu Jicong 已提交
266
  } else {
267
    mndSetSinkTaskInfo(pStream, pTask);
L
Liu Jicong 已提交
268
  }
L
Liu Jicong 已提交
269

270 271 272 273
  if (mndAssignStreamTaskToVgroup(pMnode, pTask, plan, pVgroup) < 0) {
    return terrno;
  }

274 275 276 277 278
  for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) {
    SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i);
    setTaskUpstreamEpInfo(pTask, pSinkTask);
  }

279
  return TSDB_CODE_SUCCESS;
L
Liu Jicong 已提交
280 281
}

282
static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) {
283 284 285 286 287 288
  SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo));
  if (pEpInfo == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

289 290 291
  pEpInfo->childId = pTask->info.selfChildId;
  pEpInfo->epSet = pTask->info.epSet;
  pEpInfo->nodeId = pTask->info.nodeId;
292 293 294 295 296 297 298 299
  pEpInfo->taskId = pTask->id.taskId;

  return pEpInfo;
}

void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask) {
  STaskDispatcherFixedEp* pDispatcher = &pDstTask->fixedEpDispatcher;
  pDispatcher->taskId = pTask->id.taskId;
300 301
  pDispatcher->nodeId = pTask->info.nodeId;
  pDispatcher->epSet = pTask->info.epSet;
302

303
  pDstTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH;
304
  pDstTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
305 306
}

307
int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream) {
308 309 310 311 312
  SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pTask);
  if (pEpInfo == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }

Y
yihaoDeng 已提交
313
  if (pDownstream->pUpstreamEpInfoList == NULL) {
314
    pDownstream->pUpstreamEpInfoList = taosArrayInit(4, POINTER_BYTES);
315
  }
Y
yihaoDeng 已提交
316

317
  taosArrayPush(pDownstream->pUpstreamEpInfoList, &pEpInfo);
318 319
  return TSDB_CODE_SUCCESS;
}
320

321 322 323 324 325 326 327 328
static SArray* addNewTaskList(SArray* pTasksList) {
  SArray* pTaskList = taosArrayInit(0, POINTER_BYTES);
  taosArrayPush(pTasksList, &pTaskList);
  return pTaskList;
}

// set the history task id
static void setHTasksId(SArray* pTaskList, const SArray* pHTaskList) {
Y
yihaoDeng 已提交
329
  for (int32_t i = 0; i < taosArrayGetSize(pTaskList); ++i) {
330 331
    SStreamTask** pStreamTask = taosArrayGet(pTaskList, i);
    SStreamTask** pHTask = taosArrayGet(pHTaskList, i);
332

333 334
    (*pStreamTask)->historyTaskId.taskId = (*pHTask)->id.taskId;
    (*pStreamTask)->historyTaskId.streamId = (*pHTask)->id.streamId;
H
Haojun Liao 已提交
335 336 337 338

    (*pHTask)->streamTaskId.taskId = (*pStreamTask)->id.taskId;
    (*pHTask)->streamTaskId.streamId = (*pStreamTask)->id.streamId;

H
Haojun Liao 已提交
339 340
    mDebug("s-task:0x%" PRIx64 "-0x%x related history task:0x%" PRIx64 "-0x%x, level:%d", (*pStreamTask)->id.streamId,
           (*pStreamTask)->id.taskId, (*pHTask)->id.streamId, (*pHTask)->id.taskId, (*pHTask)->info.taskLevel);
341 342 343
  }
}

344
static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* pPlan, SStreamObj* pStream,
H
Haojun Liao 已提交
345
                                               bool hasExtraSink, int64_t nextWindowSkey) {
346 347
  // create exec stream task, since only one level, the exec task is also the source task
  SArray* pTaskList = addNewTaskList(pStream->tasks);
H
Haojun Liao 已提交
348
  SSdb*   pSdb = pMnode->pSdb;
349 350 351 352 353 354 355 356 357 358 359

  SArray* pHTaskList = NULL;
  if (pStream->conf.fillHistory) {
    pHTaskList = addNewTaskList(pStream->pHTasksList);
  }

  SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
  if (LIST_LENGTH(inner->pNodeList) != 1) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
L
Liu Jicong 已提交
360

361 362
  SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
  if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
L
Liu Jicong 已提交
363 364 365
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
366

367 368 369 370 371 372 373
  void* pIter = NULL;
  while (1) {
    SVgObj* pVgroup;
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
    if (pIter == NULL) {
      break;
    }
374

375 376 377 378
    if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
      sdbRelease(pSdb, pVgroup);
      continue;
    }
379

380 381
    // new stream task
    SArray** pSinkTaskList = taosArrayGet(pStream->tasks, SINK_NODE_LEVEL);
382
    int32_t  code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, 0,
H
Haojun Liao 已提交
383
                                        hasExtraSink, nextWindowSkey);
384 385 386 387 388 389 390
    if (code != TSDB_CODE_SUCCESS) {
      sdbRelease(pSdb, pVgroup);
      return -1;
    }

    if (pStream->conf.fillHistory) {
      SArray** pHSinkTaskList = taosArrayGet(pStream->pHTasksList, SINK_NODE_LEVEL);
391
      code = addSourceStreamTask(pMnode, pVgroup, pHTaskList, *pHSinkTaskList, pStream, plan, pStream->hTaskUid,
H
Haojun Liao 已提交
392
                                 1, hasExtraSink, nextWindowSkey);
393 394 395 396 397
    }

    sdbRelease(pSdb, pVgroup);
    if (code != TSDB_CODE_SUCCESS) {
      return -1;
L
Liu Jicong 已提交
398 399
    }
  }
400

H
Haojun Liao 已提交
401 402 403 404
  if (pStream->conf.fillHistory) {
    setHTasksId(pTaskList, pHTaskList);
  }

405 406
  return TSDB_CODE_SUCCESS;
}
407

Y
yihaoDeng 已提交
408
static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t uid, SStreamTask* pDownstreamTask,
H
Haojun Liao 已提交
409
                               SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, int64_t nextWindowSkey) {
410 411 412 413 414
  SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, 0, pTaskList);
  if (pTask == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
L
Liu Jicong 已提交
415

416
  // todo set the correct ts, which should be last key of queried table.
H
Haojun Liao 已提交
417 418 419
  STimeWindow* pWindow = &pTask->dataRange.window;
  pWindow->skey = INT64_MIN;
  pWindow->ekey = nextWindowSkey - 1;
420

421
  mDebug("s-task:0x%x level:%d set time window:%" PRId64 " - %" PRId64, pTask->id.taskId, pTask->info.taskLevel,
H
Haojun Liao 已提交
422
         pWindow->skey, pWindow->ekey);
423

424 425 426 427 428
  // all the source tasks dispatch result to a single agg node.
  setFixedDownstreamEpInfo(pTask, pDownstreamTask);
  if (mndAssignStreamTaskToVgroup(pMnode, pTask, pPlan, pVgroup) < 0) {
    return -1;
  }
429

430
  return setTaskUpstreamEpInfo(pTask, pDownstreamTask);
431
}
L
Liu Jicong 已提交
432

Y
yihaoDeng 已提交
433 434
static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeList, SMnode* pMnode, SStreamObj* pStream,
                            int32_t fillHistory, SStreamTask** pAggTask) {
435
  *pAggTask = tNewStreamTask(uid, TASK_LEVEL__AGG, fillHistory, pStream->conf.triggerParam, pTaskList);
436 437 438 439
  if (*pAggTask == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
440

441
  // dispatch
442
  if (mndAddDispatcherForInternalTask(pMnode, pStream, pSinkNodeList, *pAggTask) < 0) {
443 444
    return -1;
  }
L
Liu Jicong 已提交
445

446 447
  return 0;
}
L
Liu Jicong 已提交
448

449 450
static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SStreamTask** pAggTask,
                          SStreamTask** pHAggTask) {
451 452 453
  SArray* pAggTaskList = addNewTaskList(pStream->tasks);
  SSdb*   pSdb = pMnode->pSdb;

454 455
  SNodeListNode* pInnerNode = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
  SSubplan*      plan = (SSubplan*)nodesListGetNode(pInnerNode->pNodeList, 0);
456 457 458 459 460
  if (plan->subplanType != SUBPLAN_TYPE_MERGE) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }

461 462
  *pAggTask = NULL;
  SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL);
463

464
  int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, 0, pAggTask);
465 466 467 468 469 470 471 472 473 474 475
  if (code != TSDB_CODE_SUCCESS) {
    return -1;
  }

  SVgObj*    pVgroup = NULL;
  SSnodeObj* pSnode = NULL;

  if (tsDeployOnSnode) {
    pSnode = mndSchedFetchOneSnode(pMnode);
    if (pSnode == NULL) {
      pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
L
Liu Jicong 已提交
476
    }
477 478 479
  } else {
    pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
  }
L
Liu Jicong 已提交
480

481
  if (pSnode != NULL) {
482
    code = mndAssignStreamTaskToSnode(pMnode, *pAggTask, plan, pSnode);
483
  } else {
484
    code = mndAssignStreamTaskToVgroup(pMnode, *pAggTask, plan, pVgroup);
485
  }
486

487 488
  if (pStream->conf.fillHistory) {
    SArray* pHAggTaskList = addNewTaskList(pStream->pHTasksList);
489
    SArray* pHSinkNodeList = taosArrayGetP(pStream->pHTasksList, SINK_NODE_LEVEL);
S
Shengliang Guan 已提交
490

491
    *pHAggTask = NULL;
Y
yihaoDeng 已提交
492 493
    code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pStream->conf.fillHistory,
                        pHAggTask);
494 495 496 497
    if (code != TSDB_CODE_SUCCESS) {
      if (pSnode != NULL) {
        sdbRelease(pSdb, pSnode);
      } else {
498 499
        sdbRelease(pSdb, pVgroup);
      }
500 501
      return code;
    }
L
Liu Jicong 已提交
502

503
    if (pSnode != NULL) {
504
      code = mndAssignStreamTaskToSnode(pMnode, *pHAggTask, plan, pSnode);
505
    } else {
506
      code = mndAssignStreamTaskToVgroup(pMnode, *pHAggTask, plan, pVgroup);
507
    }
L
Liu Jicong 已提交
508

509 510 511 512 513 514 515 516
    setHTasksId(pAggTaskList, pHAggTaskList);
  }

  if (pSnode != NULL) {
    sdbRelease(pSdb, pSnode);
  } else {
    sdbRelease(pSdb, pVgroup);
  }
L
Liu Jicong 已提交
517

518 519 520 521
  return code;
}

static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPlan, SStreamObj* pStream,
H
Haojun Liao 已提交
522
                                                 SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask, int64_t nextWindowSkey) {
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
  SArray* pSourceTaskList = addNewTaskList(pStream->tasks);

  SArray* pHSourceTaskList = NULL;
  if (pStream->conf.fillHistory) {
    pHSourceTaskList = addNewTaskList(pStream->pHTasksList);
  }

  SSdb*          pSdb = pMnode->pSdb;
  SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 1);
  SSubplan*      plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
  if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }

  void* pIter = NULL;
  while (1) {
    SVgObj* pVgroup;
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
    if (pIter == NULL) {
      break;
    }

    if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
      sdbRelease(pSdb, pVgroup);
      continue;
    }

H
Haojun Liao 已提交
551 552
    int32_t code =
        doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, nextWindowSkey);
553 554 555 556 557 558 559
    if (code != TSDB_CODE_SUCCESS) {
      sdbRelease(pSdb, pVgroup);
      terrno = code;
      return -1;
    }

    if (pStream->conf.fillHistory) {
H
Haojun Liao 已提交
560 561
      code = doAddSourceTask(pHSourceTaskList, 1, pStream->hTaskUid, pHDownstreamTask, pMnode, plan, pVgroup,
                             nextWindowSkey);
562
      if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
563
        sdbRelease(pSdb, pVgroup);
564
        return code;
L
Liu Jicong 已提交
565
      }
L
Liu Jicong 已提交
566
    }
H
Haojun Liao 已提交
567 568

    sdbRelease(pSdb, pVgroup);
569 570
  }

571 572 573 574
  if (pStream->conf.fillHistory) {
    setHTasksId(pSourceTaskList, pHSourceTaskList);
  }

575 576 577
  return TSDB_CODE_SUCCESS;
}

Y
yihaoDeng 已提交
578 579
static int32_t addSinkTasks(SArray* pTasksList, SMnode* pMnode, SStreamObj* pStream, SArray** pCreatedTaskList,
                            int32_t fillHistory) {
580 581
  SArray* pSinkTaskList = addNewTaskList(pTasksList);
  if (pStream->fixedSinkVgId == 0) {
582
    if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, fillHistory) < 0) {
583
      // TODO free
L
Liu Jicong 已提交
584 585
      return -1;
    }
586
  } else {
Y
yihaoDeng 已提交
587 588
    if (mndAddSinkTaskToStream(pStream, pSinkTaskList, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg,
                               fillHistory) < 0) {
589
      // TODO free
L
Liu Jicong 已提交
590 591
      return -1;
    }
592
  }
L
Liu Jicong 已提交
593

594 595 596
  *pCreatedTaskList = pSinkTaskList;
  return TSDB_CODE_SUCCESS;
}
597

598 599 600 601 602 603 604 605
static void setSinkTaskUpstreamInfo(SArray* pTasksList, const SStreamTask* pUpstreamTask) {
  SArray* pSinkTaskList = taosArrayGetP(pTasksList, SINK_NODE_LEVEL);
  for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) {
    SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i);
    setTaskUpstreamEpInfo(pUpstreamTask, pSinkTask);
  }
}

H
Haojun Liao 已提交
606
static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey) {
Y
yihaoDeng 已提交
607
  SSdb*   pSdb = pMnode->pSdb;
608
  int32_t numOfPlanLevel = LIST_LENGTH(pPlan->pSubplans);
S
Shengliang Guan 已提交
609

610 611 612 613 614 615 616
  bool    hasExtraSink = false;
  bool    externalTargetDB = strcmp(pStream->sourceDb, pStream->targetDb) != 0;
  SDbObj* pDbObj = mndAcquireDb(pMnode, pStream->targetDb);
  if (pDbObj == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
L
Liu Jicong 已提交
617

618 619 620 621 622 623 624 625 626 627 628
  bool multiTarget = (pDbObj->cfg.numOfVgroups > 1);
  sdbRelease(pSdb, pDbObj);

  pStream->tasks = taosArrayInit(numOfPlanLevel + 1, POINTER_BYTES);
  pStream->pHTasksList = taosArrayInit(numOfPlanLevel + 1, POINTER_BYTES);

  if (numOfPlanLevel == 2 || externalTargetDB || multiTarget || pStream->fixedSinkVgId) {
    // add extra sink
    hasExtraSink = true;

    SArray* pSinkTaskList = NULL;
629
    int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, 0);
630 631 632 633 634 635 636
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    // check for fill history
    if (pStream->conf.fillHistory) {
      SArray* pHSinkTaskList = NULL;
H
Haojun Liao 已提交
637
      code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, 1);
638
      if (code != TSDB_CODE_SUCCESS) {
639
        return code;
L
Liu Jicong 已提交
640
      }
641 642

      setHTasksId(pSinkTaskList, pHSinkTaskList);
L
Liu Jicong 已提交
643 644
    }
  }
645

646 647 648
  pStream->totalLevel = numOfPlanLevel + hasExtraSink;

  if (numOfPlanLevel > 1) {
649 650 651 652
    SStreamTask* pAggTask = NULL;
    SStreamTask* pHAggTask = NULL;

    int32_t code = addAggTask(pStream, pMnode, pPlan, &pAggTask, &pHAggTask);
653 654 655 656
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

657 658 659
    setSinkTaskUpstreamInfo(pStream->tasks, pAggTask);
    setSinkTaskUpstreamInfo(pStream->pHTasksList, pHAggTask);

660
    // source level
H
Haojun Liao 已提交
661
    return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, nextWindowSkey);
662
  } else if (numOfPlanLevel == 1) {
H
Haojun Liao 已提交
663
    return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, hasExtraSink, nextWindowSkey);
664 665
  }

L
Liu Jicong 已提交
666 667
  return 0;
}
L
Liu Jicong 已提交
668

H
Haojun Liao 已提交
669
int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindowSkey) {
670 671 672 673 674 675
  SQueryPlan* pPlan = qStringToQueryPlan(pStream->physicalPlan);
  if (pPlan == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }

H
Haojun Liao 已提交
676
  int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey);
677
  qDestroyQueryPlan(pPlan);
678

679 680 681
  return code;
}

L
Liu Jicong 已提交
682
int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscribeObj* pSub) {
L
Liu Jicong 已提交
683 684
  SSdb*       pSdb = pMnode->pSdb;
  SVgObj*     pVgroup = NULL;
L
Liu Jicong 已提交
685
  SQueryPlan* pPlan = NULL;
686
  SSubplan*   pSubplan = NULL;
L
Liu Jicong 已提交
687

L
Liu Jicong 已提交
688
  if (pTopic->subType == TOPIC_SUB_TYPE__COLUMN) {
L
Liu Jicong 已提交
689 690 691 692 693
    pPlan = qStringToQueryPlan(pTopic->physicalPlan);
    if (pPlan == NULL) {
      terrno = TSDB_CODE_QRY_INVALID_INPUT;
      return -1;
    }
Y
yihaoDeng 已提交
694 695
  } else if (pTopic->subType == TOPIC_SUB_TYPE__TABLE && pTopic->ast != NULL) {
    SNode* pAst = NULL;
696 697 698 699 700 701 702 703 704 705 706 707 708
    if (nodesStringToNode(pTopic->ast, &pAst) != 0) {
      mError("topic:%s, failed to create since %s", pTopic->name, terrstr());
      return -1;
    }

    SPlanContext cxt = {.pAstRoot = pAst, .topicQuery = true};
    if (qCreateQueryPlan(&cxt, &pPlan, NULL) != 0) {
      mError("failed to create topic:%s since %s", pTopic->name, terrstr());
      nodesDestroyNode(pAst);
      return -1;
    }
    nodesDestroyNode(pAst);
  }
L
Liu Jicong 已提交
709

Y
yihaoDeng 已提交
710
  if (pPlan) {
L
Liu Jicong 已提交
711 712 713
    int32_t levelNum = LIST_LENGTH(pPlan->pSubplans);
    if (levelNum != 1) {
      qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
714
      terrno = TSDB_CODE_MND_INVALID_TOPIC_QUERY;
L
Liu Jicong 已提交
715 716
      return -1;
    }
L
Liu Jicong 已提交
717

718
    SNodeListNode* pNodeListNode = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
L
Liu Jicong 已提交
719

720
    int32_t opNum = LIST_LENGTH(pNodeListNode->pNodeList);
L
Liu Jicong 已提交
721 722
    if (opNum != 1) {
      qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
723
      terrno = TSDB_CODE_MND_INVALID_TOPIC_QUERY;
L
Liu Jicong 已提交
724 725
      return -1;
    }
726 727

    pSubplan = (SSubplan*)nodesListGetNode(pNodeListNode->pNodeList, 0);
L
Liu Jicong 已提交
728
  }
L
Liu Jicong 已提交
729 730 731 732

  void* pIter = NULL;
  while (1) {
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
733 734 735 736
    if (pIter == NULL) {
      break;
    }

S
Shengliang Guan 已提交
737
    if (!mndVgroupInDb(pVgroup, pTopic->dbUid)) {
L
Liu Jicong 已提交
738 739 740 741 742 743
      sdbRelease(pSdb, pVgroup);
      continue;
    }

    pSub->vgNum++;

744
    SMqVgEp* pVgEp = taosMemoryMalloc(sizeof(SMqVgEp));
L
Liu Jicong 已提交
745 746
    pVgEp->epSet = mndGetVgroupEpset(pMnode, pVgroup);
    pVgEp->vgId = pVgroup->vgId;
747
    taosArrayPush(pSub->unassignedVgs, &pVgEp);
748

749
    mDebug("init subscription %s for topic:%s assign vgId:%d", pSub->key, pTopic->name, pVgEp->vgId);
L
Liu Jicong 已提交
750

751 752
    sdbRelease(pSdb, pVgroup);
  }
L
Liu Jicong 已提交
753

754 755
  if (pSubplan) {
    int32_t msgLen;
L
Liu Jicong 已提交
756

757 758 759 760
    if (qSubPlanToString(pSubplan, &pSub->qmsg, &msgLen) < 0) {
      qDestroyQueryPlan(pPlan);
      terrno = TSDB_CODE_QRY_INVALID_INPUT;
      return -1;
L
Liu Jicong 已提交
761
    }
762 763
  } else {
    pSub->qmsg = taosStrdup("");
L
Liu Jicong 已提交
764 765
  }

X
Xiaoyu Wang 已提交
766
  qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
767 768
  return 0;
}