mndScheduler.c 7.4 KB
Newer Older
L
Liu Jicong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "mndScheduler.h"
#include "mndConsumer.h"
#include "mndDb.h"
#include "mndDnode.h"
#include "mndMnode.h"
#include "mndOffset.h"
#include "mndShow.h"
L
Liu Jicong 已提交
23
#include "mndSnode.h"
L
Liu Jicong 已提交
24
#include "mndStb.h"
L
Liu Jicong 已提交
25
#include "mndStream.h"
L
Liu Jicong 已提交
26 27 28 29 30 31 32
#include "mndSubscribe.h"
#include "mndTopic.h"
#include "mndTrans.h"
#include "mndUser.h"
#include "mndVgroup.h"
#include "tcompare.h"
#include "tname.h"
L
Liu Jicong 已提交
33 34
#include "tuuid.h"

L
Liu Jicong 已提交
35 36
extern bool tsStreamSchedV;

L
Liu Jicong 已提交
37
int32_t mndPersistTaskDeployReq(STrans* pTrans, SStreamTask* pTask, const SEpSet* pEpSet, tmsg_t type) {
L
Liu Jicong 已提交
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
  SCoder encoder;
  tCoderInit(&encoder, TD_LITTLE_ENDIAN, NULL, 0, TD_ENCODER);
  tEncodeSStreamTask(&encoder, pTask);
  int32_t tlen = sizeof(SMsgHead) + encoder.pos;
  tCoderClear(&encoder);
  void* buf = malloc(tlen);
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  ((SMsgHead*)buf)->streamTaskId = pTask->taskId;
  void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
  tCoderInit(&encoder, TD_LITTLE_ENDIAN, abuf, tlen, TD_ENCODER);
  tEncodeSStreamTask(&encoder, pTask);
  tCoderClear(&encoder);

  STransAction action = {0};
  memcpy(&action.epSet, pEpSet, sizeof(SEpSet));
  action.pCont = buf;
  action.contLen = tlen;
L
Liu Jicong 已提交
58
  action.msgType = type;
L
Liu Jicong 已提交
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    rpcFreeCont(buf);
    return -1;
  }
  return 0;
}

int32_t mndAssignTaskToVg(SMnode* pMnode, STrans* pTrans, SStreamTask* pTask, SSubplan* plan, const SVgObj* pVgroup) {
  int32_t msgLen;
  plan->execNode.nodeId = pVgroup->vgId;
  plan->execNode.epSet = mndGetVgroupEpset(pMnode, pVgroup);

  if (qSubPlanToString(plan, &pTask->qmsg, &msgLen) < 0) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
L
Liu Jicong 已提交
75
  mndPersistTaskDeployReq(pTrans, pTask, &plan->execNode.epSet, TDMT_VND_TASK_DEPLOY);
L
Liu Jicong 已提交
76 77 78
  return 0;
}

L
Liu Jicong 已提交
79 80 81 82 83 84
SSnodeObj* mndSchedFetchSnode(SMnode* pMnode) {
  SSnodeObj* pObj = NULL;
  pObj = sdbFetch(pMnode->pSdb, SDB_SNODE, NULL, (void**)&pObj);
  return pObj;
}

L
Liu Jicong 已提交
85 86
int32_t mndAssignTaskToSnode(SMnode* pMnode, STrans* pTrans, SStreamTask* pTask, SSubplan* plan,
                             const SSnodeObj* pSnode) {
L
Liu Jicong 已提交
87 88 89 90 91 92 93 94 95
  int32_t msgLen;
  plan->execNode.nodeId = pSnode->id;
  plan->execNode.epSet = mndAcquireEpFromSnode(pMnode, pSnode);

  if (qSubPlanToString(plan, &pTask->qmsg, &msgLen) < 0) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
  mndPersistTaskDeployReq(pTrans, pTask, &plan->execNode.epSet, TDMT_SND_TASK_DEPLOY);
L
Liu Jicong 已提交
96 97 98
  return 0;
}

L
Liu Jicong 已提交
99
int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) {
L
Liu Jicong 已提交
100 101 102 103 104 105 106 107 108
  SSdb*       pSdb = pMnode->pSdb;
  SVgObj*     pVgroup = NULL;
  SQueryPlan* pPlan = qStringToQueryPlan(pStream->physicalPlan);
  if (pPlan == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
  ASSERT(pStream->vgNum == 0);

L
Liu Jicong 已提交
109 110
  int32_t totLevel = LIST_LENGTH(pPlan->pSubplans);
  pStream->tasks = taosArrayInit(totLevel, sizeof(SArray));
L
Liu Jicong 已提交
111
  int32_t lastUsedVgId = 0;
L
Liu Jicong 已提交
112

L
Liu Jicong 已提交
113 114 115
  for (int32_t level = 0; level < totLevel; level++) {
    SArray*        taskOneLevel = taosArrayInit(0, sizeof(SStreamTask));
    SNodeListNode* inner = nodesListGetNode(pPlan->pSubplans, level);
L
Liu Jicong 已提交
116 117 118
    int32_t        opNum = LIST_LENGTH(inner->pNodeList);
    ASSERT(opNum == 1);

L
Liu Jicong 已提交
119 120
    SSubplan* plan = nodesListGetNode(inner->pNodeList, level);
    if (level == 0) {
L
Liu Jicong 已提交
121 122 123 124 125 126 127 128 129 130
      ASSERT(plan->type == SUBPLAN_TYPE_SCAN);
      void* pIter = NULL;
      while (1) {
        pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
        if (pIter == NULL) break;
        if (pVgroup->dbUid != pStream->dbUid) {
          sdbRelease(pSdb, pVgroup);
          continue;
        }

L
Liu Jicong 已提交
131
        lastUsedVgId = pVgroup->vgId;
L
Liu Jicong 已提交
132
        pStream->vgNum++;
L
Liu Jicong 已提交
133 134 135
        // send to vnode

        SStreamTask* pTask = streamTaskNew(pStream->uid, level);
L
Liu Jicong 已提交
136
        pTask->pipeSource = 1;
L
Liu Jicong 已提交
137
        pTask->pipeSink = level == totLevel - 1 ? 1 : 0;
L
Liu Jicong 已提交
138
        pTask->parallelizable = 1;
L
Liu Jicong 已提交
139 140
        // TODO: set to
        if (mndAssignTaskToVg(pMnode, pTrans, pTask, plan, pVgroup) < 0) {
L
Liu Jicong 已提交
141 142 143 144 145
          sdbRelease(pSdb, pVgroup);
          qDestroyQueryPlan(pPlan);
          return -1;
        }
        taosArrayPush(taskOneLevel, pTask);
L
Liu Jicong 已提交
146
      }
L
Liu Jicong 已提交
147 148
    } else {
      SStreamTask* pTask = streamTaskNew(pStream->uid, level);
L
Liu Jicong 已提交
149
      pTask->pipeSource = 0;
L
Liu Jicong 已提交
150
      pTask->pipeSink = level == totLevel - 1 ? 1 : 0;
L
Liu Jicong 已提交
151 152 153 154 155 156 157 158
      pTask->parallelizable = plan->type == SUBPLAN_TYPE_SCAN;
      pTask->nextOpDst = STREAM_NEXT_OP_DST__VND;

      if (tsStreamSchedV) {
        ASSERT(lastUsedVgId != 0);
        SVgObj* pVg = mndAcquireVgroup(pMnode, lastUsedVgId);
        if (mndAssignTaskToVg(pMnode, pTrans, pTask, plan, pVg) < 0) {
          sdbRelease(pSdb, pVg);
L
Liu Jicong 已提交
159 160 161
          qDestroyQueryPlan(pPlan);
          return -1;
        }
L
Liu Jicong 已提交
162
        sdbRelease(pSdb, pVg);
L
Liu Jicong 已提交
163
      } else {
L
Liu Jicong 已提交
164 165 166 167 168 169 170 171 172 173 174 175
        SSnodeObj* pSnode = mndSchedFetchSnode(pMnode);
        if (pSnode != NULL) {
          if (mndAssignTaskToSnode(pMnode, pTrans, pTask, plan, pSnode) < 0) {
            sdbRelease(pSdb, pSnode);
            qDestroyQueryPlan(pPlan);
            return -1;
          }
          sdbRelease(pMnode->pSdb, pSnode);
        } else {
          // TODO: assign to one vg
          ASSERT(0);
        }
L
Liu Jicong 已提交
176
      }
L
Liu Jicong 已提交
177

L
Liu Jicong 已提交
178
      taosArrayPush(taskOneLevel, pTask);
L
Liu Jicong 已提交
179 180 181 182 183
    }
    taosArrayPush(pStream->tasks, taskOneLevel);
  }
  return 0;
}
L
Liu Jicong 已提交
184 185

int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscribeObj* pSub) {
L
Liu Jicong 已提交
186 187
  SSdb*       pSdb = pMnode->pSdb;
  SVgObj*     pVgroup = NULL;
X
Xiaoyu Wang 已提交
188
  SQueryPlan* pPlan = qStringToQueryPlan(pTopic->physicalPlan);
X
Xiaoyu Wang 已提交
189
  if (pPlan == NULL) {
L
Liu Jicong 已提交
190 191 192
    terrno = TSDB_CODE_QRY_INVALID_INPUT;
    return -1;
  }
L
Liu Jicong 已提交
193 194 195

  ASSERT(pSub->vgNum == 0);

X
Xiaoyu Wang 已提交
196
  int32_t levelNum = LIST_LENGTH(pPlan->pSubplans);
L
Liu Jicong 已提交
197
  if (levelNum != 1) {
X
Xiaoyu Wang 已提交
198
    qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
199
    terrno = TSDB_CODE_MND_UNSUPPORTED_TOPIC;
L
Liu Jicong 已提交
200 201 202
    return -1;
  }

X
Xiaoyu Wang 已提交
203
  SNodeListNode* inner = nodesListGetNode(pPlan->pSubplans, 0);
L
Liu Jicong 已提交
204

X
Xiaoyu Wang 已提交
205
  int32_t opNum = LIST_LENGTH(inner->pNodeList);
L
Liu Jicong 已提交
206
  if (opNum != 1) {
X
Xiaoyu Wang 已提交
207
    qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
208
    terrno = TSDB_CODE_MND_UNSUPPORTED_TOPIC;
L
Liu Jicong 已提交
209 210
    return -1;
  }
X
Xiaoyu Wang 已提交
211
  SSubplan* plan = nodesListGetNode(inner->pNodeList, 0);
L
Liu Jicong 已提交
212 213 214 215 216 217 218 219 220 221 222 223

  void* pIter = NULL;
  while (1) {
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
    if (pIter == NULL) break;
    if (pVgroup->dbUid != pTopic->dbUid) {
      sdbRelease(pSdb, pVgroup);
      continue;
    }

    pSub->vgNum++;
    plan->execNode.nodeId = pVgroup->vgId;
L
Liu Jicong 已提交
224
    plan->execNode.epSet = mndGetVgroupEpset(pMnode, pVgroup);
L
Liu Jicong 已提交
225 226 227 228

    SMqConsumerEp consumerEp = {0};
    consumerEp.status = 0;
    consumerEp.consumerId = -1;
L
Liu Jicong 已提交
229
    consumerEp.epSet = plan->execNode.epSet;
L
Liu Jicong 已提交
230 231
    consumerEp.vgId = plan->execNode.nodeId;
    int32_t msgLen;
L
Liu Jicong 已提交
232 233
    if (qSubPlanToString(plan, &consumerEp.qmsg, &msgLen) < 0) {
      sdbRelease(pSdb, pVgroup);
X
Xiaoyu Wang 已提交
234
      qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
235 236 237 238
      terrno = TSDB_CODE_QRY_INVALID_INPUT;
      return -1;
    }
    taosArrayPush(pSub->unassignedVg, &consumerEp);
L
Liu Jicong 已提交
239 240
  }

X
Xiaoyu Wang 已提交
241
  qDestroyQueryPlan(pPlan);
L
Liu Jicong 已提交
242

L
Liu Jicong 已提交
243 244
  return 0;
}