You need to sign in or sign up before continuing.
mndScheduler.c 7.4 KB
Newer Older
L
Liu Jicong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "mndScheduler.h"
#include "mndConsumer.h"
#include "mndDb.h"
#include "mndDnode.h"
#include "mndMnode.h"
#include "mndOffset.h"
#include "mndShow.h"
L
Liu Jicong 已提交
23
#include "mndSnode.h"
L
Liu Jicong 已提交
24
#include "mndStb.h"
L
Liu Jicong 已提交
25
#include "mndStream.h"
L
Liu Jicong 已提交
26 27 28 29 30 31 32
#include "mndSubscribe.h"
#include "mndTopic.h"
#include "mndTrans.h"
#include "mndUser.h"
#include "mndVgroup.h"
#include "tcompare.h"
#include "tname.h"
L
Liu Jicong 已提交
33 34
#include "tuuid.h"

L
Liu Jicong 已提交
35 36
extern bool tsStreamSchedV;

L
Liu Jicong 已提交
37
int32_t mndPersistTaskDeployReq(STrans* pTrans, SStreamTask* pTask, const SEpSet* pEpSet, tmsg_t type, int32_t nodeId) {
L
Liu Jicong 已提交
38 39 40
  SCoder encoder;
  tCoderInit(&encoder, TD_LITTLE_ENDIAN, NULL, 0, TD_ENCODER);
  tEncodeSStreamTask(&encoder, pTask);
L
Liu Jicong 已提交
41 42
  int32_t size = encoder.pos;
  int32_t tlen = sizeof(SMsgHead) + size;
L
Liu Jicong 已提交
43 44 45 46 47 48
  tCoderClear(&encoder);
  void* buf = malloc(tlen);
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
L
Liu Jicong 已提交
49
  ((SMsgHead*)buf)->streamTaskId = htonl(nodeId);
L
Liu Jicong 已提交
50
  void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
L
Liu Jicong 已提交
51
  tCoderInit(&encoder, TD_LITTLE_ENDIAN, abuf, size, TD_ENCODER);
L
Liu Jicong 已提交
52 53 54 55 56 57 58
  tEncodeSStreamTask(&encoder, pTask);
  tCoderClear(&encoder);

  STransAction action = {0};
  memcpy(&action.epSet, pEpSet, sizeof(SEpSet));
  action.pCont = buf;
  action.contLen = tlen;
L
Liu Jicong 已提交
59
  action.msgType = type;
L
Liu Jicong 已提交
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    rpcFreeCont(buf);
    return -1;
  }
  return 0;
}

int32_t mndAssignTaskToVg(SMnode* pMnode, STrans* pTrans, SStreamTask* pTask, SSubplan* plan, const SVgObj* pVgroup) {
  int32_t msgLen;
  plan->execNode.nodeId = pVgroup->vgId;
  plan->execNode.epSet = mndGetVgroupEpset(pMnode, pVgroup);

  if (qSubPlanToString(plan, &pTask->qmsg, &msgLen) < 0) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
L
Liu Jicong 已提交
76
  mndPersistTaskDeployReq(pTrans, pTask, &plan->execNode.epSet, TDMT_VND_TASK_DEPLOY, pVgroup->vgId);
L
Liu Jicong 已提交
77 78 79
  return 0;
}

L
Liu Jicong 已提交
80 81 82 83 84 85
SSnodeObj* mndSchedFetchSnode(SMnode* pMnode) {
  SSnodeObj* pObj = NULL;
  pObj = sdbFetch(pMnode->pSdb, SDB_SNODE, NULL, (void**)&pObj);
  return pObj;
}

L
Liu Jicong 已提交
86 87
int32_t mndAssignTaskToSnode(SMnode* pMnode, STrans* pTrans, SStreamTask* pTask, SSubplan* plan,
                             const SSnodeObj* pSnode) {
L
Liu Jicong 已提交
88 89 90 91 92 93 94 95
  int32_t msgLen;
  plan->execNode.nodeId = pSnode->id;
  plan->execNode.epSet = mndAcquireEpFromSnode(pMnode, pSnode);

  if (qSubPlanToString(plan, &pTask->qmsg, &msgLen) < 0) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
L
Liu Jicong 已提交
96
  mndPersistTaskDeployReq(pTrans, pTask, &plan->execNode.epSet, TDMT_SND_TASK_DEPLOY, 0);
L
Liu Jicong 已提交
97 98 99
  return 0;
}

L
Liu Jicong 已提交
100
int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) {
L
Liu Jicong 已提交
101 102 103 104 105 106 107 108 109
  SSdb*       pSdb = pMnode->pSdb;
  SVgObj*     pVgroup = NULL;
  SQueryPlan* pPlan = qStringToQueryPlan(pStream->physicalPlan);
  if (pPlan == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
  ASSERT(pStream->vgNum == 0);

L
Liu Jicong 已提交
110 111
  int32_t totLevel = LIST_LENGTH(pPlan->pSubplans);
  pStream->tasks = taosArrayInit(totLevel, sizeof(SArray));
L
Liu Jicong 已提交
112
  int32_t lastUsedVgId = 0;
L
Liu Jicong 已提交
113

L
Liu Jicong 已提交
114 115 116
  for (int32_t level = 0; level < totLevel; level++) {
    SArray*        taskOneLevel = taosArrayInit(0, sizeof(SStreamTask));
    SNodeListNode* inner = nodesListGetNode(pPlan->pSubplans, level);
L
Liu Jicong 已提交
117 118 119
    int32_t        opNum = LIST_LENGTH(inner->pNodeList);
    ASSERT(opNum == 1);

L
Liu Jicong 已提交
120 121
    SSubplan* plan = nodesListGetNode(inner->pNodeList, level);
    if (level == 0) {
L
Liu Jicong 已提交
122
      ASSERT(plan->subplanType == SUBPLAN_TYPE_SCAN);
L
Liu Jicong 已提交
123 124 125 126 127 128 129 130 131
      void* pIter = NULL;
      while (1) {
        pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
        if (pIter == NULL) break;
        if (pVgroup->dbUid != pStream->dbUid) {
          sdbRelease(pSdb, pVgroup);
          continue;
        }

L
Liu Jicong 已提交
132
        lastUsedVgId = pVgroup->vgId;
L
Liu Jicong 已提交
133
        pStream->vgNum++;
L
Liu Jicong 已提交
134 135 136
        // send to vnode

        SStreamTask* pTask = streamTaskNew(pStream->uid, level);
L
Liu Jicong 已提交
137
        pTask->pipeSource = 1;
L
Liu Jicong 已提交
138
        pTask->pipeSink = level == totLevel - 1 ? 1 : 0;
L
Liu Jicong 已提交
139
        pTask->parallelizable = 1;
L
Liu Jicong 已提交
140 141
        // TODO: set to
        if (mndAssignTaskToVg(pMnode, pTrans, pTask, plan, pVgroup) < 0) {
L
Liu Jicong 已提交
142 143 144 145 146
          sdbRelease(pSdb, pVgroup);
          qDestroyQueryPlan(pPlan);
          return -1;
        }
        taosArrayPush(taskOneLevel, pTask);
L
Liu Jicong 已提交
147
      }
L
Liu Jicong 已提交
148 149
    } else {
      SStreamTask* pTask = streamTaskNew(pStream->uid, level);
L
Liu Jicong 已提交
150
      pTask->pipeSource = 0;
L
Liu Jicong 已提交
151
      pTask->pipeSink = level == totLevel - 1 ? 1 : 0;
L
Liu Jicong 已提交
152
      pTask->parallelizable = plan->subplanType == SUBPLAN_TYPE_SCAN;
L
Liu Jicong 已提交
153 154 155 156 157 158 159
      pTask->nextOpDst = STREAM_NEXT_OP_DST__VND;

      if (tsStreamSchedV) {
        ASSERT(lastUsedVgId != 0);
        SVgObj* pVg = mndAcquireVgroup(pMnode, lastUsedVgId);
        if (mndAssignTaskToVg(pMnode, pTrans, pTask, plan, pVg) < 0) {
          sdbRelease(pSdb, pVg);
L
Liu Jicong 已提交
160 161 162
          qDestroyQueryPlan(pPlan);
          return -1;
        }
L
Liu Jicong 已提交
163
        sdbRelease(pSdb, pVg);
L
Liu Jicong 已提交
164
      } else {
L
Liu Jicong 已提交
165 166 167 168 169 170 171 172 173 174 175 176
        SSnodeObj* pSnode = mndSchedFetchSnode(pMnode);
        if (pSnode != NULL) {
          if (mndAssignTaskToSnode(pMnode, pTrans, pTask, plan, pSnode) < 0) {
            sdbRelease(pSdb, pSnode);
            qDestroyQueryPlan(pPlan);
            return -1;
          }
          sdbRelease(pMnode->pSdb, pSnode);
        } else {
          // TODO: assign to one vg
          ASSERT(0);
        }
L
Liu Jicong 已提交
177
      }
L
Liu Jicong 已提交
178

L
Liu Jicong 已提交
179
      taosArrayPush(taskOneLevel, pTask);
L
Liu Jicong 已提交
180 181 182 183 184
    }
    taosArrayPush(pStream->tasks, taskOneLevel);
  }
  return 0;
}
L
Liu Jicong 已提交
185 186

int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscribeObj* pSub) {
L
Liu Jicong 已提交
187 188
  SSdb*       pSdb = pMnode->pSdb;
  SVgObj*     pVgroup = NULL;
X
Xiaoyu Wang 已提交
189
  SQueryPlan* pPlan = qStringToQueryPlan(pTopic->physicalPlan);
X
Xiaoyu Wang 已提交
190
  if (pPlan == NULL) {
L
Liu Jicong 已提交
191 192 193
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
L
Liu Jicong 已提交
194 195 196

  ASSERT(pSub->vgNum == 0);

X
Xiaoyu Wang 已提交
197
  int32_t levelNum = LIST_LENGTH(pPlan->pSubplans);
L
Liu Jicong 已提交
198
  if (levelNum != 1) {
X
Xiaoyu Wang 已提交
199
    qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
200
    terrno = TSDB_CODE_MND_UNSUPPORTED_TOPIC;
L
Liu Jicong 已提交
201 202 203
    return -1;
  }

X
Xiaoyu Wang 已提交
204
  SNodeListNode* inner = nodesListGetNode(pPlan->pSubplans, 0);
L
Liu Jicong 已提交
205

X
Xiaoyu Wang 已提交
206
  int32_t opNum = LIST_LENGTH(inner->pNodeList);
L
Liu Jicong 已提交
207
  if (opNum != 1) {
X
Xiaoyu Wang 已提交
208
    qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
209
    terrno = TSDB_CODE_MND_UNSUPPORTED_TOPIC;
L
Liu Jicong 已提交
210 211
    return -1;
  }
X
Xiaoyu Wang 已提交
212
  SSubplan* plan = nodesListGetNode(inner->pNodeList, 0);
L
Liu Jicong 已提交
213 214 215 216 217 218 219 220 221 222 223 224

  void* pIter = NULL;
  while (1) {
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
    if (pIter == NULL) break;
    if (pVgroup->dbUid != pTopic->dbUid) {
      sdbRelease(pSdb, pVgroup);
      continue;
    }

    pSub->vgNum++;
    plan->execNode.nodeId = pVgroup->vgId;
L
Liu Jicong 已提交
225
    plan->execNode.epSet = mndGetVgroupEpset(pMnode, pVgroup);
L
Liu Jicong 已提交
226 227 228 229

    SMqConsumerEp consumerEp = {0};
    consumerEp.status = 0;
    consumerEp.consumerId = -1;
L
Liu Jicong 已提交
230
    consumerEp.epSet = plan->execNode.epSet;
L
Liu Jicong 已提交
231 232
    consumerEp.vgId = plan->execNode.nodeId;
    int32_t msgLen;
L
Liu Jicong 已提交
233 234
    if (qSubPlanToString(plan, &consumerEp.qmsg, &msgLen) < 0) {
      sdbRelease(pSdb, pVgroup);
X
Xiaoyu Wang 已提交
235
      qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
236 237 238 239
      terrno = TSDB_CODE_QRY_INVALID_INPUT;
      return -1;
    }
    taosArrayPush(pSub->unassignedVg, &consumerEp);
L
Liu Jicong 已提交
240 241
  }

X
Xiaoyu Wang 已提交
242
  qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
243

L
Liu Jicong 已提交
244 245
  return 0;
}