mndSubscribe.c 38.2 KB
Newer Older
L
Liu Jicong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

S
Shengliang Guan 已提交
16
#define _DEFAULT_SOURCE
L
Liu Jicong 已提交
17
#include "mndSubscribe.h"
L
Liu Jicong 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30
#include "mndConsumer.h"
#include "mndDb.h"
#include "mndDnode.h"
#include "mndMnode.h"
#include "mndShow.h"
#include "mndStb.h"
#include "mndTopic.h"
#include "mndTrans.h"
#include "mndUser.h"
#include "mndVgroup.h"
#include "tcompare.h"
#include "tname.h"

L
Liu Jicong 已提交
31
#define MND_SUBSCRIBE_VER_NUMBER   1
L
Liu Jicong 已提交
32 33
#define MND_SUBSCRIBE_RESERVE_SIZE 64

L
Liu Jicong 已提交
34
#define MND_SUBSCRIBE_REBALANCE_CNT 3
L
Liu Jicong 已提交
35

L
Liu Jicong 已提交
36 37 38 39 40 41
enum {
  MQ_SUBSCRIBE_STATUS__ACTIVE = 1,
  MQ_SUBSCRIBE_STATUS__DELETED,
};

static char *mndMakeSubscribeKey(const char *cgroup, const char *topicName);
L
Liu Jicong 已提交
42

L
Liu Jicong 已提交
43 44 45 46 47 48 49 50 51 52
static SSdbRaw *mndSubActionEncode(SMqSubscribeObj *);
static SSdbRow *mndSubActionDecode(SSdbRaw *pRaw);
static int32_t  mndSubActionInsert(SSdb *pSdb, SMqSubscribeObj *);
static int32_t  mndSubActionDelete(SSdb *pSdb, SMqSubscribeObj *);
static int32_t  mndSubActionUpdate(SSdb *pSdb, SMqSubscribeObj *pOldSub, SMqSubscribeObj *pNewSub);

static int32_t mndProcessSubscribeReq(SMnodeMsg *pMsg);
static int32_t mndProcessSubscribeRsp(SMnodeMsg *pMsg);
static int32_t mndProcessSubscribeInternalReq(SMnodeMsg *pMsg);
static int32_t mndProcessSubscribeInternalRsp(SMnodeMsg *pMsg);
L
Liu Jicong 已提交
53
static int32_t mndProcessMqTimerMsg(SMnodeMsg *pMsg);
L
Liu Jicong 已提交
54
static int32_t mndProcessGetSubEpReq(SMnodeMsg *pMsg);
L
Liu Jicong 已提交
55
static int32_t mndProcessDoRebalanceMsg(SMnodeMsg *pMsg);
L
Liu Jicong 已提交
56

S
Shengliang Guan 已提交
57 58
static int32_t mndPersistMqSetConnReq(SMnode *pMnode, STrans *pTrans, const SMqTopicObj *pTopic, const char *cgroup,
                                      const SMqConsumerEp *pConsumerEp);
L
Liu Jicong 已提交
59

L
Liu Jicong 已提交
60 61
static int32_t mndPersistRebalanceMsg(SMnode *pMnode, STrans *pTrans, const SMqConsumerEp *pConsumerEp);

S
Shengliang Guan 已提交
62
static int32_t mndInitUnassignedVg(SMnode *pMnode, const SMqTopicObj *pTopic, SMqSubscribeObj *pSub);
L
Liu Jicong 已提交
63 64 65 66 67 68 69 70 71 72 73

int32_t mndInitSubscribe(SMnode *pMnode) {
  SSdbTable table = {.sdbType = SDB_SUBSCRIBE,
                     .keyType = SDB_KEY_BINARY,
                     .encodeFp = (SdbEncodeFp)mndSubActionEncode,
                     .decodeFp = (SdbDecodeFp)mndSubActionDecode,
                     .insertFp = (SdbInsertFp)mndSubActionInsert,
                     .updateFp = (SdbUpdateFp)mndSubActionUpdate,
                     .deleteFp = (SdbDeleteFp)mndSubActionDelete};

  mndSetMsgHandle(pMnode, TDMT_MND_SUBSCRIBE, mndProcessSubscribeReq);
L
Liu Jicong 已提交
74
  mndSetMsgHandle(pMnode, TDMT_VND_MQ_SET_CONN_RSP, mndProcessSubscribeInternalRsp);
L
Liu Jicong 已提交
75
  mndSetMsgHandle(pMnode, TDMT_VND_MQ_REB_RSP, mndProcessSubscribeInternalRsp);
L
Liu Jicong 已提交
76
  mndSetMsgHandle(pMnode, TDMT_MND_MQ_TIMER, mndProcessMqTimerMsg);
L
Liu Jicong 已提交
77
  mndSetMsgHandle(pMnode, TDMT_MND_GET_SUB_EP, mndProcessGetSubEpReq);
L
Liu Jicong 已提交
78
  mndSetMsgHandle(pMnode, TDMT_MND_MQ_DO_REBALANCE, mndProcessDoRebalanceMsg);
L
Liu Jicong 已提交
79 80 81
  return sdbSetTable(pMnode->pSdb, table);
}

L
Liu Jicong 已提交
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
static SMqSubscribeObj *mndCreateSubscription(SMnode *pMnode, const SMqTopicObj *pTopic, const char *consumerGroup) {
  SMqSubscribeObj *pSub = tNewSubscribeObj();
  if (pSub == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
  char *key = mndMakeSubscribeKey(consumerGroup, pTopic->name);
  if (key == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    tDeleteSMqSubscribeObj(pSub);
    free(pSub);
    return NULL;
  }
  strcpy(pSub->key, key);
  free(key);

  if (mndInitUnassignedVg(pMnode, pTopic, pSub) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    tDeleteSMqSubscribeObj(pSub);
    free(pSub);
    return NULL;
  }
  // TODO: disable alter subscribed table
  return pSub;
}

L
Liu Jicong 已提交
108
static int32_t mndBuildRebalanceMsg(void **pBuf, int32_t *pLen, const SMqConsumerEp *pConsumerEp) {
L
Liu Jicong 已提交
109
  SMqMVRebReq req = {
L
Liu Jicong 已提交
110 111 112
      .vgId = pConsumerEp->vgId,
      .oldConsumerId = pConsumerEp->oldConsumerId,
      .newConsumerId = pConsumerEp->consumerId,
L
Liu Jicong 已提交
113 114
  };

L
Liu Jicong 已提交
115
  int32_t tlen = tEncodeSMqMVRebReq(NULL, &req);
L
Liu Jicong 已提交
116 117 118 119 120 121 122 123 124
  void   *buf = malloc(sizeof(SMsgHead) + tlen);
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  SMsgHead *pMsgHead = (SMsgHead *)buf;

  pMsgHead->contLen = htonl(sizeof(SMsgHead) + tlen);
  pMsgHead->vgId = htonl(pConsumerEp->vgId);
L
Liu Jicong 已提交
125

L
Liu Jicong 已提交
126
  void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
L
Liu Jicong 已提交
127
  tEncodeSMqMVRebReq(&abuf, &req);
L
Liu Jicong 已提交
128

L
Liu Jicong 已提交
129 130 131 132 133 134
  *pBuf = buf;
  *pLen = tlen;

  return 0;
}

L
Liu Jicong 已提交
135
static int32_t mndPersistRebalanceMsg(SMnode *pMnode, STrans *pTrans, const SMqConsumerEp *pConsumerEp) {
136
  ASSERT(pConsumerEp->oldConsumerId != -1);
L
Liu Jicong 已提交
137 138 139 140 141
  int32_t vgId = pConsumerEp->vgId;
  SVgObj *pVgObj = mndAcquireVgroup(pMnode, vgId);

  void   *buf;
  int32_t tlen;
L
Liu Jicong 已提交
142
  if (mndBuildRebalanceMsg(&buf, &tlen, pConsumerEp) < 0) {
L
Liu Jicong 已提交
143 144 145 146 147 148 149
    return -1;
  }

  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgObj);
  action.pCont = buf;
  action.contLen = sizeof(SMsgHead) + tlen;
150
  action.msgType = TDMT_VND_MQ_REB;
L
Liu Jicong 已提交
151 152 153 154 155 156 157 158 159 160 161 162

  mndReleaseVgroup(pMnode, pVgObj);
  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    free(buf);
    return -1;
  }

  return 0;
}

static int32_t mndBuildCancelConnReq(void **pBuf, int32_t *pLen, const SMqConsumerEp *pConsumerEp) {
  SMqSetCVgReq req = {0};
L
Liu Jicong 已提交
163
  req.consumerId = pConsumerEp->consumerId;
L
Liu Jicong 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206

  int32_t tlen = tEncodeSMqSetCVgReq(NULL, &req);
  void   *buf = malloc(sizeof(SMsgHead) + tlen);
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  SMsgHead *pMsgHead = (SMsgHead *)buf;

  pMsgHead->contLen = htonl(sizeof(SMsgHead) + tlen);
  pMsgHead->vgId = htonl(pConsumerEp->vgId);
  void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
  tEncodeSMqSetCVgReq(&abuf, &req);
  *pBuf = buf;
  *pLen = tlen;
  return 0;
}

static int32_t mndPersistCancelConnReq(SMnode *pMnode, STrans *pTrans, const SMqConsumerEp *pConsumerEp) {
  int32_t vgId = pConsumerEp->vgId;
  SVgObj *pVgObj = mndAcquireVgroup(pMnode, vgId);

  void   *buf;
  int32_t tlen;
  if (mndBuildCancelConnReq(&buf, &tlen, pConsumerEp) < 0) {
    return -1;
  }

  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgObj);
  action.pCont = buf;
  action.contLen = sizeof(SMsgHead) + tlen;
  action.msgType = TDMT_VND_MQ_SET_CONN;

  mndReleaseVgroup(pMnode, pVgObj);
  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    free(buf);
    return -1;
  }

  return 0;
}

L
Liu Jicong 已提交
207 208
static int32_t mndProcessGetSubEpReq(SMnodeMsg *pMsg) {
  SMnode           *pMnode = pMsg->pMnode;
L
Liu Jicong 已提交
209
  SMqCMGetSubEpReq *pReq = (SMqCMGetSubEpReq *)pMsg->rpcMsg.pCont;
L
Liu Jicong 已提交
210
  SMqCMGetSubEpRsp  rsp = {0};
L
Liu Jicong 已提交
211
  int64_t           consumerId = be64toh(pReq->consumerId);
L
Liu Jicong 已提交
212
  int32_t           epoch = ntohl(pReq->epoch);
L
Liu Jicong 已提交
213 214 215

  SMqConsumerObj *pConsumer = mndAcquireConsumer(pMsg->pMnode, consumerId);
  if (pConsumer == NULL) {
L
Liu Jicong 已提交
216
    terrno = TSDB_CODE_MND_CONSUMER_NOT_EXIST;
L
Liu Jicong 已提交
217 218 219 220
    return -1;
  }
  ASSERT(strcmp(pReq->cgroup, pConsumer->cgroup) == 0);

L
Liu Jicong 已提交
221
  // TODO
L
Liu Jicong 已提交
222
  int32_t hbStatus = atomic_load_32(&pConsumer->hbStatus);
223
  mTrace("try to get sub ep, old val: %d", hbStatus);
L
Liu Jicong 已提交
224 225 226 227 228
  atomic_store_32(&pConsumer->hbStatus, 0);
  /*SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pConsumer);*/
  /*sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);*/
  /*sdbWrite(pMnode->pSdb, pConsumerRaw);*/

L
Liu Jicong 已提交
229 230
  strcpy(rsp.cgroup, pReq->cgroup);
  rsp.consumerId = consumerId;
L
Liu Jicong 已提交
231
  rsp.epoch = pConsumer->epoch;
L
Liu Jicong 已提交
232
  if (epoch != rsp.epoch) {
L
Liu Jicong 已提交
233
    mInfo("send new assignment to consumer, consumer epoch %d, server epoch %d", epoch, rsp.epoch);
L
Liu Jicong 已提交
234
    SArray *pTopics = pConsumer->currentTopics;
S
Shengliang Guan 已提交
235
    int32_t sz = taosArrayGetSize(pTopics);
L
Liu Jicong 已提交
236
    rsp.topics = taosArrayInit(sz, sizeof(SMqSubTopicEp));
S
Shengliang Guan 已提交
237
    for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
238 239 240
      char            *topicName = taosArrayGetP(pTopics, i);
      SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, pConsumer->cgroup, topicName);
      ASSERT(pSub);
S
Shengliang Guan 已提交
241
      int32_t csz = taosArrayGetSize(pSub->consumers);
L
Liu Jicong 已提交
242
      // TODO: change to bsearch
S
Shengliang Guan 已提交
243
      for (int32_t j = 0; j < csz; j++) {
L
Liu Jicong 已提交
244 245
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, j);
        if (consumerId == pSubConsumer->consumerId) {
S
Shengliang Guan 已提交
246
          int32_t       vgsz = taosArrayGetSize(pSubConsumer->vgInfo);
L
Liu Jicong 已提交
247
          SMqSubTopicEp topicEp;
L
Liu Jicong 已提交
248
          strcpy(topicEp.topic, topicName);
L
Liu Jicong 已提交
249
          topicEp.vgs = taosArrayInit(vgsz, sizeof(SMqSubVgEp));
S
Shengliang Guan 已提交
250
          for (int32_t k = 0; k < vgsz; k++) {
L
Liu Jicong 已提交
251 252 253 254 255 256 257 258
            SMqConsumerEp *pConsumerEp = taosArrayGet(pSubConsumer->vgInfo, k);

            SMqSubVgEp vgEp = {.epSet = pConsumerEp->epSet, .vgId = pConsumerEp->vgId};
            taosArrayPush(topicEp.vgs, &vgEp);
          }
          taosArrayPush(rsp.topics, &topicEp);
          break;
        }
L
Liu Jicong 已提交
259
      }
L
Liu Jicong 已提交
260
      mndReleaseSubscribe(pMnode, pSub);
L
Liu Jicong 已提交
261 262 263
    }
  }
  int32_t tlen = tEncodeSMqCMGetSubEpRsp(NULL, &rsp);
L
Liu Jicong 已提交
264
  void   *buf = rpcMallocCont(tlen);
L
Liu Jicong 已提交
265 266 267 268 269 270
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  void *abuf = buf;
  tEncodeSMqCMGetSubEpRsp(&abuf, &rsp);
L
Liu Jicong 已提交
271
  tDeleteSMqCMGetSubEpRsp(&rsp);
L
Liu Jicong 已提交
272
  mndReleaseConsumer(pMnode, pConsumer);
L
Liu Jicong 已提交
273 274 275 276 277
  pMsg->pCont = buf;
  pMsg->contLen = tlen;
  return 0;
}

L
Liu Jicong 已提交
278
static int32_t mndSplitSubscribeKey(char *key, char **topic, char **cgroup) {
S
Shengliang Guan 已提交
279
  int32_t i = 0;
L
Liu Jicong 已提交
280 281 282 283
  while (key[i] != ':') {
    i++;
  }
  key[i] = 0;
L
Liu Jicong 已提交
284
  *cgroup = strdup(key);
L
Liu Jicong 已提交
285
  key[i] = ':';
L
Liu Jicong 已提交
286
  *topic = strdup(&key[i + 1]);
L
Liu Jicong 已提交
287 288 289
  return 0;
}

L
Liu Jicong 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302
static SMqRebSubscribe *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) {
  SMqRebSubscribe *pRebSub = taosHashGet(pHash, key, strlen(key));
  if (pRebSub == NULL) {
    pRebSub = tNewSMqRebSubscribe(key);
    if (pRebSub == NULL) {
      // TODO
      return NULL;
    }
    taosHashPut(pHash, key, strlen(key), pRebSub, sizeof(SMqRebSubscribe));
  }
  return pRebSub;
}

L
Liu Jicong 已提交
303
static int32_t mndProcessMqTimerMsg(SMnodeMsg *pMsg) {
L
Liu Jicong 已提交
304 305 306 307 308 309 310
  SMnode            *pMnode = pMsg->pMnode;
  SSdb              *pSdb = pMnode->pSdb;
  SMqConsumerObj    *pConsumer;
  void              *pIter = NULL;
  SMqDoRebalanceMsg *pRebMsg = rpcMallocCont(sizeof(SMqDoRebalanceMsg));
  pRebMsg->rebSubHash = taosHashInit(64, MurmurHash3_32, true, HASH_NO_LOCK);

L
Liu Jicong 已提交
311 312 313
  while (1) {
    pIter = sdbFetch(pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer);
    if (pIter == NULL) break;
314
    int32_t hbStatus = atomic_add_fetch_32(&pConsumer->hbStatus, 1);
L
Liu Jicong 已提交
315 316 317 318
    if (hbStatus > MND_SUBSCRIBE_REBALANCE_CNT) {
      int32_t old =
          atomic_val_compare_exchange_32(&pConsumer->status, MQ_CONSUMER_STATUS__ACTIVE, MQ_CONSUMER_STATUS__LOST);
      if (old == MQ_CONSUMER_STATUS__ACTIVE) {
L
Liu Jicong 已提交
319
        // get all topics of that topic
S
Shengliang Guan 已提交
320 321
        int32_t sz = taosArrayGetSize(pConsumer->currentTopics);
        for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
          char            *topic = taosArrayGetP(pConsumer->currentTopics, i);
          char            *key = mndMakeSubscribeKey(pConsumer->cgroup, topic);
          SMqRebSubscribe *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
          taosArrayPush(pRebSub->lostConsumers, &pConsumer->consumerId);
        }
      }
    }
    int32_t status = atomic_load_32(&pConsumer->status);
    if (status == MQ_CONSUMER_STATUS__INIT || status == MQ_CONSUMER_STATUS__MODIFY) {
      SArray *rebSubs;
      if (status == MQ_CONSUMER_STATUS__INIT) {
        rebSubs = pConsumer->currentTopics;
      } else {
        rebSubs = pConsumer->recentRemovedTopics;
      }
S
Shengliang Guan 已提交
337 338
      int32_t sz = taosArrayGetSize(rebSubs);
      for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
339 340 341 342 343 344 345 346
        char            *topic = taosArrayGetP(rebSubs, i);
        char            *key = mndMakeSubscribeKey(pConsumer->cgroup, topic);
        SMqRebSubscribe *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
        if (status == MQ_CONSUMER_STATUS__INIT) {
          taosArrayPush(pRebSub->newConsumers, &pConsumer->consumerId);
        } else if (status == MQ_CONSUMER_STATUS__MODIFY) {
          taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
        }
L
Liu Jicong 已提交
347 348 349
      }
    }
  }
L
Liu Jicong 已提交
350 351 352 353 354 355 356 357
  if (taosHashGetSize(pRebMsg->rebSubHash) != 0) {
    mInfo("mq rebalance will be triggered");
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_MQ_DO_REBALANCE, .pCont = pRebMsg, .contLen = sizeof(SMqDoRebalanceMsg)};
    pMnode->putReqToMWriteQFp(pMnode->pDnode, &rpcMsg);
  } else {
    taosHashCleanup(pRebMsg->rebSubHash);
    rpcFreeCont(pRebMsg);
  }
L
Liu Jicong 已提交
358 359 360 361 362 363 364
  return 0;
}

static int32_t mndProcessDoRebalanceMsg(SMnodeMsg *pMsg) {
  SMnode            *pMnode = pMsg->pMnode;
  SMqDoRebalanceMsg *pReq = (SMqDoRebalanceMsg *)pMsg->rpcMsg.pCont;
  STrans            *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pMsg->rpcMsg);
L
Liu Jicong 已提交
365 366 367 368 369 370 371 372 373 374 375 376 377
  void              *pIter = NULL;

  mInfo("mq rebalance start");

  while (1) {
    pIter = taosHashIterate(pReq->rebSubHash, pIter);
    if (pIter == NULL) break;
    SMqRebSubscribe *pRebSub = (SMqRebSubscribe *)pIter;
    SMqSubscribeObj *pSub = mndAcquireSubscribeByKey(pMnode, pRebSub->key);

    mInfo("mq rebalance subscription: %s", pSub->key);

    // remove lost consumer
S
Shengliang Guan 已提交
378
    for (int32_t i = 0; i < taosArrayGetSize(pRebSub->lostConsumers); i++) {
L
Liu Jicong 已提交
379 380 381 382
      int64_t lostConsumerId = *(int64_t *)taosArrayGet(pRebSub->lostConsumers, i);

      mInfo("mq remove lost consumer %ld", lostConsumerId);

S
Shengliang Guan 已提交
383
      for (int32_t j = 0; j < taosArrayGetSize(pSub->consumers); j++) {
L
Liu Jicong 已提交
384 385 386 387
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, j);
        if (pSubConsumer->consumerId == lostConsumerId) {
          taosArrayAddAll(pSub->unassignedVg, pSubConsumer->vgInfo);
          taosArrayPush(pSub->lostConsumers, pSubConsumer);
L
Liu Jicong 已提交
388 389 390 391 392 393 394 395
          taosArrayRemove(pSub->consumers, j);
          break;
        }
      }
    }

    // calculate rebalance
    int32_t consumerNum = taosArrayGetSize(pSub->consumers);
L
Liu Jicong 已提交
396 397 398
    if (consumerNum != 0) {
      int32_t vgNum = pSub->vgNum;
      int32_t vgEachConsumer = vgNum / consumerNum;
L
Liu Jicong 已提交
399 400 401 402
      int32_t imbalanceVg = vgNum % consumerNum;
      int32_t imbalanceSolved = 0;

      // iterate all consumers, set unassignedVgStash
S
Shengliang Guan 已提交
403
      for (int32_t i = 0; i < consumerNum; i++) {
L
Liu Jicong 已提交
404
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, i);
S
Shengliang Guan 已提交
405 406
        int32_t         vgThisConsumerBeforeRb = taosArrayGetSize(pSubConsumer->vgInfo);
        int32_t         vgThisConsumerAfterRb;
L
Liu Jicong 已提交
407 408 409 410
        if (i < imbalanceVg)
          vgThisConsumerAfterRb = vgEachConsumer + 1;
        else
          vgThisConsumerAfterRb = vgEachConsumer;
L
Liu Jicong 已提交
411

L
Liu Jicong 已提交
412 413
        mInfo("mq consumer:%ld, connectted vgroup number change from %d to %d", pSubConsumer->consumerId,
              vgThisConsumerBeforeRb, vgThisConsumerAfterRb);
L
Liu Jicong 已提交
414

L
Liu Jicong 已提交
415
        while (taosArrayGetSize(pSubConsumer->vgInfo) > vgThisConsumerAfterRb) {
L
Liu Jicong 已提交
416 417 418
          SMqConsumerEp *pConsumerEp = taosArrayPop(pSubConsumer->vgInfo);
          ASSERT(pConsumerEp != NULL);
          ASSERT(pConsumerEp->consumerId == pSubConsumer->consumerId);
L
Liu Jicong 已提交
419
          taosArrayPush(pSub->unassignedVg, pConsumerEp);
L
Liu Jicong 已提交
420 421
        }

L
Liu Jicong 已提交
422 423 424 425 426
        SMqConsumerObj *pRebConsumer = mndAcquireConsumer(pMnode, pSubConsumer->consumerId);
        int32_t         status = atomic_load_32(&pRebConsumer->status);
        if (vgThisConsumerAfterRb != vgThisConsumerBeforeRb ||
            (vgThisConsumerAfterRb != 0 && status != MQ_CONSUMER_STATUS__ACTIVE) ||
            (vgThisConsumerAfterRb == 0 && status != MQ_CONSUMER_STATUS__LOST)) {
L
Liu Jicong 已提交
427 428 429
          if (vgThisConsumerAfterRb != vgThisConsumerBeforeRb) {
            pRebConsumer->epoch++;
          }
L
Liu Jicong 已提交
430 431 432 433 434
          if (vgThisConsumerAfterRb != 0) {
            atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__ACTIVE);
          } else {
            atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__IDLE);
          }
L
Liu Jicong 已提交
435

L
Liu Jicong 已提交
436
          mInfo("mq consumer:%ld, status change from %d to %d", pRebConsumer->consumerId, status, pRebConsumer->status);
L
Liu Jicong 已提交
437

L
Liu Jicong 已提交
438 439 440 441 442
          SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pRebConsumer);
          sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
          mndTransAppendRedolog(pTrans, pConsumerRaw);
        }
        mndReleaseConsumer(pMnode, pRebConsumer);
L
Liu Jicong 已提交
443 444
      }

L
Liu Jicong 已提交
445
      // assign to vgroup
L
Liu Jicong 已提交
446
      if (taosArrayGetSize(pSub->unassignedVg) != 0) {
S
Shengliang Guan 已提交
447
        for (int32_t i = 0; i < consumerNum; i++) {
L
Liu Jicong 已提交
448
          SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, i);
S
Shengliang Guan 已提交
449 450
          int32_t         vgThisConsumerBeforeRb = taosArrayGetSize(pSubConsumer->vgInfo);
          int32_t         vgThisConsumerAfterRb;
L
Liu Jicong 已提交
451 452 453 454
          if (i < imbalanceVg)
            vgThisConsumerAfterRb = vgEachConsumer + 1;
          else
            vgThisConsumerAfterRb = vgEachConsumer;
L
Liu Jicong 已提交
455

L
Liu Jicong 已提交
456 457
          while (taosArrayGetSize(pSubConsumer->vgInfo) < vgThisConsumerAfterRb) {
            SMqConsumerEp *pConsumerEp = taosArrayPop(pSub->unassignedVg);
L
Liu Jicong 已提交
458 459 460 461
            ASSERT(pConsumerEp != NULL);

            pConsumerEp->oldConsumerId = pConsumerEp->consumerId;
            pConsumerEp->consumerId = pSubConsumer->consumerId;
L
Liu Jicong 已提交
462
            taosArrayPush(pSubConsumer->vgInfo, pConsumerEp);
L
Liu Jicong 已提交
463

464
            if (pConsumerEp->oldConsumerId == -1) {
L
Liu Jicong 已提交
465 466
              char *topic;
              char *cgroup;
467
              mndSplitSubscribeKey(pSub->key, &topic, &cgroup);
L
Liu Jicong 已提交
468
              SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic);
469

L
Liu Jicong 已提交
470 471
              mInfo("mq set conn: assign vgroup %d of topic %s to consumer %ld", pConsumerEp->vgId, topic,
                    pConsumerEp->consumerId);
472 473 474 475 476 477

              mndPersistMqSetConnReq(pMnode, pTrans, pTopic, cgroup, pConsumerEp);
              mndReleaseTopic(pMnode, pTopic);
              free(topic);
              free(cgroup);
            } else {
L
Liu Jicong 已提交
478 479
              mInfo("mq rebalance: assign vgroup %d, from consumer %ld to consumer %ld", pConsumerEp->vgId,
                    pConsumerEp->oldConsumerId, pConsumerEp->consumerId);
L
Liu Jicong 已提交
480

481 482
              mndPersistRebalanceMsg(pMnode, pTrans, pConsumerEp);
            }
L
Liu Jicong 已提交
483 484 485
          }
        }
      }
L
Liu Jicong 已提交
486
      ASSERT(taosArrayGetSize(pSub->unassignedVg) == 0);
L
Liu Jicong 已提交
487 488 489 490 491 492 493 494 495 496

      // TODO: log rebalance statistics
      SSdbRaw *pSubRaw = mndSubActionEncode(pSub);
      sdbSetRawStatus(pSubRaw, SDB_STATUS_READY);
      mndTransAppendRedolog(pTrans, pSubRaw);
    }
    mndReleaseSubscribe(pMnode, pSub);
  }
  if (mndTransPrepare(pMnode, pTrans) != 0) {
    mError("mq-rebalance-trans:%d, failed to prepare since %s", pTrans->id, terrstr());
L
Liu Jicong 已提交
497
    taosHashCleanup(pReq->rebSubHash);
L
Liu Jicong 已提交
498 499 500 501
    mndTransDrop(pTrans);
    return -1;
  }

L
Liu Jicong 已提交
502
  taosHashCleanup(pReq->rebSubHash);
L
Liu Jicong 已提交
503 504 505 506 507
  mndTransDrop(pTrans);
  return 0;
}

#if 0
L
Liu Jicong 已提交
508 509
      for (int32_t j = 0; j < consumerNum; j++) {
        bool            changed = false;
L
Liu Jicong 已提交
510 511 512 513 514 515 516 517 518
        bool            unfished = false;

        bool            canUseLeft = imbalanceSolved < imbalanceVg;
        bool            mustUseLeft = canUseLeft && (imbalanceVg - imbalanceSolved >= consumerNum - j);
        ASSERT(imbalanceVg - imbalanceSolved <= consumerNum - j);

        int32_t maxVg = vgEachConsumer + canUseLeft;
        int32_t minVg = vgEachConsumer + mustUseLeft;

L
Liu Jicong 已提交
519
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, j);
L
Liu Jicong 已提交
520 521 522 523 524 525 526 527 528
        int32_t         vgThisConsumerBeforeRb = taosArrayGetSize(pSubConsumer->vgInfo);
        int32_t         vgThisConsumerAfterRb;
        if (vgThisConsumerBeforeRb > maxVg) {
          vgThisConsumerAfterRb = maxVg; 
          imbalanceSolved++;
          changed = true;
        } else if (vgThisConsumerBeforeRb < minVg) {
          vgThisConsumerAfterRb = minVg;
          if (mustUseLeft) imbalanceSolved++;
L
Liu Jicong 已提交
529
          changed = true;
L
Liu Jicong 已提交
530 531 532 533 534 535 536
        } else {
          vgThisConsumerAfterRb = vgThisConsumerBeforeRb;
        }

        if (vgThisConsumerBeforeRb > vgThisConsumerAfterRb) {
          while (taosArrayGetSize(pSubConsumer->vgInfo) > vgThisConsumerAfterRb) {
            // put into unassigned
L
Liu Jicong 已提交
537 538
            SMqConsumerEp *pConsumerEp = taosArrayPop(pSubConsumer->vgInfo);
            ASSERT(pConsumerEp != NULL);
L
Liu Jicong 已提交
539
            ASSERT(pConsumerEp->consumerId == pSubConsumer->consumerId);
L
Liu Jicong 已提交
540 541
            taosArrayPush(unassignedVgStash, pConsumerEp);
          }
L
Liu Jicong 已提交
542 543

        } else if (vgThisConsumerBeforeRb < vgThisConsumerAfterRb) {
L
Liu Jicong 已提交
544
          // assign from unassigned
L
Liu Jicong 已提交
545
          while (taosArrayGetSize(pSubConsumer->vgInfo) < vgThisConsumerAfterRb) {
L
Liu Jicong 已提交
546 547
            // if no unassgined, save j
            if (taosArrayGetSize(unassignedVgStash) == 0) {
L
Liu Jicong 已提交
548 549
              taosArrayPush(unassignedConsumerIdx, &j);
              unfished = true;
L
Liu Jicong 已提交
550 551
              break;
            }
L
Liu Jicong 已提交
552
            // assign vg to consumer
L
Liu Jicong 已提交
553 554 555 556 557 558 559 560
            SMqConsumerEp *pConsumerEp = taosArrayPop(unassignedVgStash);
            ASSERT(pConsumerEp != NULL);
            pConsumerEp->oldConsumerId = pConsumerEp->consumerId;
            pConsumerEp->consumerId = pSubConsumer->consumerId;
            taosArrayPush(pSubConsumer->vgInfo, pConsumerEp);
            // build msg and persist into trans
          }
        }
L
Liu Jicong 已提交
561 562

        if (changed && !unfished) {
L
Liu Jicong 已提交
563 564
          SMqConsumerObj *pRebConsumer = mndAcquireConsumer(pMnode, pSubConsumer->consumerId);
          pRebConsumer->epoch++;
L
Liu Jicong 已提交
565 566 567 568 569 570
          if (vgThisConsumerAfterRb != 0) {
            atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__ACTIVE);
          } else {
            atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__IDLE);
          }
          SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pRebConsumer);
L
Liu Jicong 已提交
571
          sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
L
Liu Jicong 已提交
572
          mndTransAppendRedolog(pTrans, pConsumerRaw);
L
Liu Jicong 已提交
573 574
          mndReleaseConsumer(pMnode, pRebConsumer);
          // TODO: save history
L
Liu Jicong 已提交
575 576 577
        }
      }

L
Liu Jicong 已提交
578 579 580
      for (int32_t j = 0; j < taosArrayGetSize(unassignedConsumerIdx); j++) {
        bool            canUseLeft = imbalanceSolved < imbalanceVg;
        int32_t         consumerIdx = *(int32_t *)taosArrayGet(unassignedConsumerIdx, j);
L
Liu Jicong 已提交
581
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, consumerIdx);
L
Liu Jicong 已提交
582 583 584 585 586
        if (canUseLeft) imbalanceSolved++;
        // must use
        int32_t vgThisConsumerAfterRb = taosArrayGetSize(pSubConsumer->vgInfo) + canUseLeft;
        while (taosArrayGetSize(pSubConsumer->vgInfo) < vgEachConsumer + canUseLeft) {
          // assign vg to consumer
L
Liu Jicong 已提交
587 588 589 590 591 592 593
          SMqConsumerEp *pConsumerEp = taosArrayPop(unassignedVgStash);
          ASSERT(pConsumerEp != NULL);
          pConsumerEp->oldConsumerId = pConsumerEp->consumerId;
          pConsumerEp->consumerId = pSubConsumer->consumerId;
          taosArrayPush(pSubConsumer->vgInfo, pConsumerEp);
          // build msg and persist into trans
        }
L
Liu Jicong 已提交
594 595 596 597 598 599 600 601
        SMqConsumerObj *pRebConsumer = mndAcquireConsumer(pMnode, pSubConsumer->consumerId);
        pRebConsumer->epoch++;
        atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__ACTIVE);
        SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pRebConsumer);
        sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
        mndTransAppendRedolog(pTrans, pConsumerRaw);
        mndReleaseConsumer(pMnode, pRebConsumer);
        // TODO: save history
L
Liu Jicong 已提交
602
      }
L
Liu Jicong 已提交
603
#endif
L
Liu Jicong 已提交
604 605 606

#if 0
    //update consumer status for the subscribption
S
Shengliang Guan 已提交
607
    for (int32_t i = 0; i < taosArrayGetSize(pSub->assigned); i++) {
L
Liu Jicong 已提交
608 609
      SMqConsumerEp *pCEp = taosArrayGet(pSub->assigned, i);
      int64_t        consumerId = pCEp->consumerId;
L
Liu Jicong 已提交
610 611 612 613 614
      if (pCEp->status != -1) {
        int32_t consumerHbStatus = atomic_fetch_add_32(&pCEp->consumerHbStatus, 1);
        if (consumerHbStatus < MND_SUBSCRIBE_REBALANCE_CNT) {
          continue;
        }
L
Liu Jicong 已提交
615
        // put consumer into lostConsumer
L
Liu Jicong 已提交
616 617
        SMqConsumerEp* lostConsumer = taosArrayPush(pSub->lostConsumer, pCEp);
        lostConsumer->qmsg = NULL;
L
Liu Jicong 已提交
618
        // put vg into unassigned
L
Liu Jicong 已提交
619 620 621 622 623
        taosArrayPush(pSub->unassignedVg, pCEp);
        // remove from assigned
        // TODO: swap with last one, reduce size and reset i
        taosArrayRemove(pSub->assigned, i);
        // remove from available consumer
S
Shengliang Guan 已提交
624
        for (int32_t j = 0; j < taosArrayGetSize(pSub->availConsumer); j++) {
L
Liu Jicong 已提交
625
          if (*(int64_t *)taosArrayGet(pSub->availConsumer, i) == pCEp->consumerId) {
L
Liu Jicong 已提交
626 627 628 629 630
            taosArrayRemove(pSub->availConsumer, j);
            break;
          }
          // TODO: acquire consumer, set status to unavail
        }
L
Liu Jicong 已提交
631
#if 0
L
Liu Jicong 已提交
632 633
        SMqConsumerObj* pConsumer = mndAcquireConsumer(pMnode, consumerId);
        pConsumer->epoch++;
L
Liu Jicong 已提交
634
        printf("current epoch %ld size %ld", pConsumer->epoch, pConsumer->topics->size);
L
Liu Jicong 已提交
635 636 637 638
        SSdbRaw* pRaw = mndConsumerActionEncode(pConsumer);
        sdbSetRawStatus(pRaw, SDB_STATUS_READY);
        sdbWriteNotFree(pMnode->pSdb, pRaw);
        mndReleaseConsumer(pMnode, pConsumer);
L
Liu Jicong 已提交
639
#endif
L
Liu Jicong 已提交
640 641
      }
    }
L
Liu Jicong 已提交
642 643 644 645 646 647 648 649
    // no available consumer, skip rebalance
    if (taosArrayGetSize(pSub->availConsumer) == 0) {
      continue;
    }
    taosArrayGet(pSub->availConsumer, 0);
    // rebalance condition1 : have unassigned vg
    // assign vg to a consumer, trying to find the least assigned one
    if ((sz = taosArrayGetSize(pSub->unassignedVg)) > 0) {
L
Liu Jicong 已提交
650 651 652 653 654 655
      char *topic = NULL;
      char *cgroup = NULL;
      mndSplitSubscribeKey(pSub->key, &topic, &cgroup);

      SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic);
      STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pMsg->rpcMsg);
L
Liu Jicong 已提交
656
      for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
657
        int64_t        consumerId = *(int64_t *)taosArrayGet(pSub->availConsumer, pSub->nextConsumerIdx);
L
Liu Jicong 已提交
658 659
        pSub->nextConsumerIdx = (pSub->nextConsumerIdx + 1) % taosArrayGetSize(pSub->availConsumer);

L
Liu Jicong 已提交
660
        SMqConsumerEp *pCEp = taosArrayPop(pSub->unassignedVg);
L
Liu Jicong 已提交
661
        pCEp->oldConsumerId = pCEp->consumerId;
L
Liu Jicong 已提交
662 663 664
        pCEp->consumerId = consumerId;
        taosArrayPush(pSub->assigned, pCEp);

L
Liu Jicong 已提交
665
        SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, consumerId);
L
Liu Jicong 已提交
666
        pConsumer->epoch++;
L
Liu Jicong 已提交
667 668 669
        SSdbRaw* pConsumerRaw = mndConsumerActionEncode(pConsumer);
        sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
        sdbWrite(pMnode->pSdb, pConsumerRaw);
L
Liu Jicong 已提交
670 671
        mndReleaseConsumer(pMnode, pConsumer);

L
Liu Jicong 已提交
672 673 674
        void* msg;
        int32_t msgLen;
        mndBuildRebalanceMsg(&msg, &msgLen, pTopic, pCEp, cgroup, topic);
L
Liu Jicong 已提交
675 676 677

        // persist msg
        STransAction action = {0};
678
        action.epSet = pCEp->epSet;
L
Liu Jicong 已提交
679 680
        action.pCont = msg;
        action.contLen = sizeof(SMsgHead) + msgLen;
L
Liu Jicong 已提交
681 682 683
        action.msgType = TDMT_VND_MQ_SET_CONN;
        mndTransAppendRedoAction(pTrans, &action);

L
Liu Jicong 已提交
684
        // persist data
L
Liu Jicong 已提交
685
        SSdbRaw *pRaw = mndSubActionEncode(pSub);
L
Liu Jicong 已提交
686
        sdbSetRawStatus(pRaw, SDB_STATUS_READY);
L
Liu Jicong 已提交
687 688
        mndTransAppendRedolog(pTrans, pRaw);
      }
L
Liu Jicong 已提交
689

L
Liu Jicong 已提交
690 691 692
      if (mndTransPrepare(pMnode, pTrans) != 0) {
        mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr());
      }
L
Liu Jicong 已提交
693
      mndReleaseTopic(pMnode, pTopic);
L
Liu Jicong 已提交
694
      mndTransDrop(pTrans);
L
Liu Jicong 已提交
695 696
      tfree(topic);
      tfree(cgroup);
L
Liu Jicong 已提交
697
    }
L
Liu Jicong 已提交
698
    // rebalance condition2 : imbalance assignment
L
Liu Jicong 已提交
699 700 701
  }
  return 0;
}
L
Liu Jicong 已提交
702
#endif
L
Liu Jicong 已提交
703

S
Shengliang Guan 已提交
704
static int32_t mndInitUnassignedVg(SMnode *pMnode, const SMqTopicObj *pTopic, SMqSubscribeObj *pSub) {
L
Liu Jicong 已提交
705 706
  SSdb      *pSdb = pMnode->pSdb;
  SVgObj    *pVgroup = NULL;
L
Liu Jicong 已提交
707
  SQueryDag *pDag = qStringToDag(pTopic->physicalPlan);
L
Liu Jicong 已提交
708
  SArray    *pArray = NULL;
L
Liu Jicong 已提交
709 710
  SArray    *inner = taosArrayGet(pDag->pSubplans, 0);
  SSubplan  *plan = taosArrayGetP(inner, 0);
L
Liu Jicong 已提交
711
  SArray    *unassignedVg = pSub->unassignedVg;
L
Liu Jicong 已提交
712

L
Liu Jicong 已提交
713 714 715 716 717 718
  void *pIter = NULL;
  while (1) {
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
    if (pIter == NULL) break;
    if (pVgroup->dbUid != pTopic->dbUid) continue;

L
Liu Jicong 已提交
719
    pSub->vgNum++;
L
Liu Jicong 已提交
720 721 722 723 724 725 726 727
    plan->execNode.nodeId = pVgroup->vgId;
    plan->execNode.epset = mndGetVgroupEpset(pMnode, pVgroup);

    if (schedulerConvertDagToTaskList(pDag, &pArray) < 0) {
      terrno = TSDB_CODE_MND_UNSUPPORTED_TOPIC;
      mError("unsupport topic: %s, sql: %s", pTopic->name, pTopic->sql);
      return -1;
    }
L
Liu Jicong 已提交
728

L
Liu Jicong 已提交
729 730 731
    SMqConsumerEp consumerEp = {0};
    consumerEp.status = 0;
    consumerEp.consumerId = -1;
L
Liu Jicong 已提交
732
    STaskInfo *pTaskInfo = taosArrayGet(pArray, 0);
L
Liu Jicong 已提交
733 734
    consumerEp.epSet = pTaskInfo->addr.epset;
    consumerEp.vgId = pTaskInfo->addr.nodeId;
L
Liu Jicong 已提交
735

L
Liu Jicong 已提交
736 737 738
    ASSERT(consumerEp.vgId == pVgroup->vgId);
    consumerEp.qmsg = strdup(pTaskInfo->msg->msg);
    taosArrayPush(unassignedVg, &consumerEp);
L
Liu Jicong 已提交
739
    // TODO: free taskInfo
L
Liu Jicong 已提交
740
    taosArrayDestroy(pArray);
L
Liu Jicong 已提交
741
  }
742

L
Liu Jicong 已提交
743
  /*qDestroyQueryDag(pDag);*/
744
  return 0;
L
Liu Jicong 已提交
745 746
}

S
Shengliang Guan 已提交
747 748
static int32_t mndPersistMqSetConnReq(SMnode *pMnode, STrans *pTrans, const SMqTopicObj *pTopic, const char *cgroup,
                                      const SMqConsumerEp *pConsumerEp) {
749
  ASSERT(pConsumerEp->oldConsumerId == -1);
L
Liu Jicong 已提交
750 751 752 753 754
  int32_t vgId = pConsumerEp->vgId;
  SVgObj *pVgObj = mndAcquireVgroup(pMnode, vgId);

  SMqSetCVgReq req = {
      .vgId = vgId,
L
Liu Jicong 已提交
755
      .consumerId = pConsumerEp->consumerId,
L
Liu Jicong 已提交
756 757 758 759 760 761 762 763 764 765 766 767 768 769
      .sql = pTopic->sql,
      .logicalPlan = pTopic->logicalPlan,
      .physicalPlan = pTopic->physicalPlan,
      .qmsg = pConsumerEp->qmsg,
  };

  strcpy(req.cgroup, cgroup);
  strcpy(req.topicName, pTopic->name);
  int32_t tlen = tEncodeSMqSetCVgReq(NULL, &req);
  void   *buf = malloc(sizeof(SMsgHead) + tlen);
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
L
Liu Jicong 已提交
770

L
Liu Jicong 已提交
771
  SMsgHead *pMsgHead = (SMsgHead *)buf;
L
Liu Jicong 已提交
772

L
Liu Jicong 已提交
773 774
  pMsgHead->contLen = htonl(sizeof(SMsgHead) + tlen);
  pMsgHead->vgId = htonl(vgId);
L
Liu Jicong 已提交
775

L
Liu Jicong 已提交
776 777
  void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
  tEncodeSMqSetCVgReq(&abuf, &req);
L
Liu Jicong 已提交
778

L
Liu Jicong 已提交
779 780 781 782 783
  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgObj);
  action.pCont = buf;
  action.contLen = sizeof(SMsgHead) + tlen;
  action.msgType = TDMT_VND_MQ_SET_CONN;
L
Liu Jicong 已提交
784

L
Liu Jicong 已提交
785 786 787 788
  mndReleaseVgroup(pMnode, pVgObj);
  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    free(buf);
    return -1;
L
Liu Jicong 已提交
789 790 791 792 793 794 795
  }
  return 0;
}

void mndCleanupSubscribe(SMnode *pMnode) {}

static SSdbRaw *mndSubActionEncode(SMqSubscribeObj *pSub) {
L
Liu Jicong 已提交
796
  terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
797
  void   *buf = NULL;
L
Liu Jicong 已提交
798
  int32_t tlen = tEncodeSubscribeObj(NULL, pSub);
L
Liu Jicong 已提交
799
  int32_t size = sizeof(int32_t) + tlen + MND_SUBSCRIBE_RESERVE_SIZE;
L
Liu Jicong 已提交
800 801 802 803

  SSdbRaw *pRaw = sdbAllocRaw(SDB_SUBSCRIBE, MND_SUBSCRIBE_VER_NUMBER, size);
  if (pRaw == NULL) goto SUB_ENCODE_OVER;

L
Liu Jicong 已提交
804
  buf = malloc(tlen);
L
Liu Jicong 已提交
805
  if (buf == NULL) goto SUB_ENCODE_OVER;
L
Liu Jicong 已提交
806

L
Liu Jicong 已提交
807 808
  void *abuf = buf;
  tEncodeSubscribeObj(&abuf, pSub);
L
Liu Jicong 已提交
809 810 811 812 813 814 815

  int32_t dataPos = 0;
  SDB_SET_INT32(pRaw, dataPos, tlen, SUB_ENCODE_OVER);
  SDB_SET_BINARY(pRaw, dataPos, buf, tlen, SUB_ENCODE_OVER);
  SDB_SET_RESERVE(pRaw, dataPos, MND_SUBSCRIBE_RESERVE_SIZE, SUB_ENCODE_OVER);
  SDB_SET_DATALEN(pRaw, dataPos, SUB_ENCODE_OVER);

L
Liu Jicong 已提交
816 817
  terrno = TSDB_CODE_SUCCESS;

L
Liu Jicong 已提交
818
SUB_ENCODE_OVER:
L
Liu Jicong 已提交
819
  tfree(buf);
L
Liu Jicong 已提交
820 821 822 823 824 825 826 827 828 829 830 831
  if (terrno != 0) {
    mError("subscribe:%s, failed to encode to raw:%p since %s", pSub->key, pRaw, terrstr());
    sdbFreeRaw(pRaw);
    return NULL;
  }

  mTrace("subscribe:%s, encode to raw:%p, row:%p", pSub->key, pRaw, pSub);
  return pRaw;
}

static SSdbRow *mndSubActionDecode(SSdbRaw *pRaw) {
  terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
832
  void *buf = NULL;
L
Liu Jicong 已提交
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850

  int8_t sver = 0;
  if (sdbGetRawSoftVer(pRaw, &sver) != 0) goto SUB_DECODE_OVER;

  if (sver != MND_SUBSCRIBE_VER_NUMBER) {
    terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
    goto SUB_DECODE_OVER;
  }

  int32_t  size = sizeof(SMqSubscribeObj);
  SSdbRow *pRow = sdbAllocRow(size);
  if (pRow == NULL) goto SUB_DECODE_OVER;

  SMqSubscribeObj *pSub = sdbGetRowObj(pRow);
  if (pSub == NULL) goto SUB_DECODE_OVER;

  int32_t dataPos = 0;
  int32_t tlen;
L
Liu Jicong 已提交
851
  SDB_GET_INT32(pRaw, dataPos, &tlen, SUB_DECODE_OVER);
L
Liu Jicong 已提交
852
  buf = malloc(tlen + 1);
L
Liu Jicong 已提交
853 854 855 856 857 858 859 860
  if (buf == NULL) goto SUB_DECODE_OVER;
  SDB_GET_BINARY(pRaw, dataPos, buf, tlen, SUB_DECODE_OVER);
  SDB_GET_RESERVE(pRaw, dataPos, MND_SUBSCRIBE_RESERVE_SIZE, SUB_DECODE_OVER);

  if (tDecodeSubscribeObj(buf, pSub) == NULL) {
    goto SUB_DECODE_OVER;
  }

L
Liu Jicong 已提交
861 862
  terrno = TSDB_CODE_SUCCESS;

L
Liu Jicong 已提交
863
SUB_DECODE_OVER:
L
Liu Jicong 已提交
864
  tfree(buf);
L
Liu Jicong 已提交
865
  if (terrno != TSDB_CODE_SUCCESS) {
L
Liu Jicong 已提交
866 867 868 869 870 871 872 873 874 875 876 877 878 879 880
    mError("subscribe:%s, failed to decode from raw:%p since %s", pSub->key, pRaw, terrstr());
    tfree(pRow);
    return NULL;
  }

  return pRow;
}

static int32_t mndSubActionInsert(SSdb *pSdb, SMqSubscribeObj *pSub) {
  mTrace("subscribe:%s, perform insert action", pSub->key);
  return 0;
}

static int32_t mndSubActionDelete(SSdb *pSdb, SMqSubscribeObj *pSub) {
  mTrace("subscribe:%s, perform delete action", pSub->key);
L
Liu Jicong 已提交
881
  tDeleteSMqSubscribeObj(pSub);
L
Liu Jicong 已提交
882 883 884 885 886 887 888 889
  return 0;
}

static int32_t mndSubActionUpdate(SSdb *pSdb, SMqSubscribeObj *pOldSub, SMqSubscribeObj *pNewSub) {
  mTrace("subscribe:%s, perform update action", pOldSub->key);
  return 0;
}

L
Liu Jicong 已提交
890
static char *mndMakeSubscribeKey(const char *cgroup, const char *topicName) {
L
Liu Jicong 已提交
891 892 893 894
  char *key = malloc(TSDB_SHOW_SUBQUERY_LEN);
  if (key == NULL) {
    return NULL;
  }
S
Shengliang Guan 已提交
895
  int32_t tlen = strlen(cgroup);
L
Liu Jicong 已提交
896 897 898 899 900 901
  memcpy(key, cgroup, tlen);
  key[tlen] = ':';
  strcpy(key + tlen + 1, topicName);
  return key;
}

L
Liu Jicong 已提交
902
SMqSubscribeObj *mndAcquireSubscribe(SMnode *pMnode, const char *cgroup, const char *topicName) {
L
Liu Jicong 已提交
903 904 905 906 907
  SSdb            *pSdb = pMnode->pSdb;
  char            *key = mndMakeSubscribeKey(cgroup, topicName);
  SMqSubscribeObj *pSub = sdbAcquire(pSdb, SDB_SUBSCRIBE, key);
  free(key);
  if (pSub == NULL) {
908
    terrno = TSDB_CODE_MND_SUBSCRIBE_NOT_EXIST;
L
Liu Jicong 已提交
909 910 911 912
  }
  return pSub;
}

L
Liu Jicong 已提交
913 914 915 916
SMqSubscribeObj *mndAcquireSubscribeByKey(SMnode *pMnode, const char *key) {
  SSdb            *pSdb = pMnode->pSdb;
  SMqSubscribeObj *pSub = sdbAcquire(pSdb, SDB_SUBSCRIBE, key);
  if (pSub == NULL) {
917
    terrno = TSDB_CODE_MND_SUBSCRIBE_NOT_EXIST;
L
Liu Jicong 已提交
918 919 920 921
  }
  return pSub;
}

L
Liu Jicong 已提交
922 923 924 925 926 927 928 929 930 931 932
void mndReleaseSubscribe(SMnode *pMnode, SMqSubscribeObj *pSub) {
  SSdb *pSdb = pMnode->pSdb;
  sdbRelease(pSdb, pSub);
}

static int32_t mndProcessSubscribeReq(SMnodeMsg *pMsg) {
  SMnode         *pMnode = pMsg->pMnode;
  char           *msgStr = pMsg->rpcMsg.pCont;
  SCMSubscribeReq subscribe;
  tDeserializeSCMSubscribeReq(msgStr, &subscribe);
  int64_t consumerId = subscribe.consumerId;
L
Liu Jicong 已提交
933
  char   *cgroup = subscribe.consumerGroup;
L
Liu Jicong 已提交
934 935

  SArray *newSub = subscribe.topicNames;
S
Shengliang Guan 已提交
936
  int32_t newTopicNum = subscribe.topicNum;
L
Liu Jicong 已提交
937 938 939 940

  taosArraySortString(newSub, taosArrayCompareString);

  SArray *oldSub = NULL;
S
Shengliang Guan 已提交
941
  int32_t oldTopicNum = 0;
L
Liu Jicong 已提交
942
  bool    createConsumer = false;
L
Liu Jicong 已提交
943 944 945 946
  // create consumer if not exist
  SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, consumerId);
  if (pConsumer == NULL) {
    // create consumer
L
Liu Jicong 已提交
947 948
    pConsumer = mndCreateConsumer(consumerId, cgroup);
    createConsumer = true;
L
Liu Jicong 已提交
949
  } else {
L
Liu Jicong 已提交
950
    pConsumer->epoch++;
L
Liu Jicong 已提交
951
    oldSub = pConsumer->currentTopics;
L
Liu Jicong 已提交
952
  }
L
Liu Jicong 已提交
953
  pConsumer->currentTopics = newSub;
L
Liu Jicong 已提交
954 955 956 957 958 959 960 961 962 963 964

  if (oldSub != NULL) {
    oldTopicNum = taosArrayGetSize(oldSub);
  }

  STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pMsg->rpcMsg);
  if (pTrans == NULL) {
    // TODO: free memory
    return -1;
  }

S
Shengliang Guan 已提交
965
  int32_t i = 0, j = 0;
L
Liu Jicong 已提交
966
  while (i < newTopicNum || j < oldTopicNum) {
L
Liu Jicong 已提交
967 968
    char *newTopicName = NULL;
    char *oldTopicName = NULL;
L
Liu Jicong 已提交
969 970
    if (i >= newTopicNum) {
      // encode unset topic msg to all vnodes related to that topic
L
Liu Jicong 已提交
971
      oldTopicName = taosArrayGetP(oldSub, j);
L
Liu Jicong 已提交
972 973
      j++;
    } else if (j >= oldTopicNum) {
L
Liu Jicong 已提交
974
      newTopicName = taosArrayGetP(newSub, i);
L
Liu Jicong 已提交
975 976
      i++;
    } else {
L
Liu Jicong 已提交
977
      newTopicName = taosArrayGetP(newSub, i);
L
Liu Jicong 已提交
978
      oldTopicName = taosArrayGetP(oldSub, j);
L
Liu Jicong 已提交
979

S
Shengliang Guan 已提交
980
      int32_t comp = compareLenPrefixedStr(newTopicName, oldTopicName);
L
Liu Jicong 已提交
981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996
      if (comp == 0) {
        // do nothing
        oldTopicName = newTopicName = NULL;
        i++;
        j++;
        continue;
      } else if (comp < 0) {
        oldTopicName = NULL;
        i++;
      } else {
        newTopicName = NULL;
        j++;
      }
    }

    if (oldTopicName != NULL) {
L
Liu Jicong 已提交
997 998 999 1000 1001
      ASSERT(newTopicName == NULL);

      // cancel subscribe of old topic
      SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, cgroup, oldTopicName);
      ASSERT(pSub);
S
Shengliang Guan 已提交
1002 1003
      int32_t csz = taosArrayGetSize(pSub->consumers);
      for (int32_t ci = 0; ci < csz; ci++) {
L
Liu Jicong 已提交
1004 1005
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, ci);
        if (pSubConsumer->consumerId == consumerId) {
S
Shengliang Guan 已提交
1006 1007
          int32_t vgsz = taosArrayGetSize(pSubConsumer->vgInfo);
          for (int32_t vgi = 0; vgi < vgsz; vgi++) {
L
Liu Jicong 已提交
1008 1009
            SMqConsumerEp *pConsumerEp = taosArrayGet(pSubConsumer->vgInfo, vgi);
            mndPersistCancelConnReq(pMnode, pTrans, pConsumerEp);
L
Liu Jicong 已提交
1010
            taosArrayPush(pSub->unassignedVg, pConsumerEp);
L
Liu Jicong 已提交
1011
          }
L
Liu Jicong 已提交
1012
          taosArrayRemove(pSub->consumers, ci);
L
Liu Jicong 已提交
1013
          break;
L
Liu Jicong 已提交
1014 1015
        }
      }
L
Liu Jicong 已提交
1016 1017
      atomic_store_32(&pConsumer->status, MQ_CONSUMER_STATUS__MODIFY);
      /*pSub->status = MQ_SUBSCRIBE_STATUS__DELETED;*/
L
Liu Jicong 已提交
1018 1019 1020 1021 1022
    } else if (newTopicName != NULL) {
      ASSERT(oldTopicName == NULL);

      SMqTopicObj *pTopic = mndAcquireTopic(pMnode, newTopicName);
      if (pTopic == NULL) {
L
Liu Jicong 已提交
1023
        mError("topic being subscribed not exist: %s", newTopicName);
L
Liu Jicong 已提交
1024 1025 1026
        continue;
      }

L
Liu Jicong 已提交
1027 1028
      SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, cgroup, newTopicName);
      bool             createSub = false;
L
Liu Jicong 已提交
1029
      if (pSub == NULL) {
L
Liu Jicong 已提交
1030 1031 1032
        mDebug("create new subscription by consumer %ld, group: %s, topic %s", consumerId, cgroup, newTopicName);
        pSub = mndCreateSubscription(pMnode, pTopic, cgroup);
        createSub = true;
L
Liu Jicong 已提交
1033
      }
L
Liu Jicong 已提交
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045

      SMqSubConsumer mqSubConsumer;
      mqSubConsumer.consumerId = consumerId;
      mqSubConsumer.vgInfo = taosArrayInit(0, sizeof(SMqConsumerEp));
      taosArrayPush(pSub->consumers, &mqSubConsumer);

      // if have un assigned vg, assign one to the consumer
      if (taosArrayGetSize(pSub->unassignedVg) > 0) {
        SMqConsumerEp *pConsumerEp = taosArrayPop(pSub->unassignedVg);
        pConsumerEp->oldConsumerId = pConsumerEp->consumerId;
        pConsumerEp->consumerId = consumerId;
        taosArrayPush(mqSubConsumer.vgInfo, pConsumerEp);
1046 1047 1048 1049 1050
        if (pConsumerEp->oldConsumerId == -1) {
          mndPersistMqSetConnReq(pMnode, pTrans, pTopic, cgroup, pConsumerEp);
        } else {
          mndPersistRebalanceMsg(pMnode, pTrans, pConsumerEp);
        }
L
Liu Jicong 已提交
1051
        // to trigger rebalance at once, do not set status active
1052
        /*atomic_store_32(&pConsumer->status, MQ_CONSUMER_STATUS__ACTIVE);*/
L
Liu Jicong 已提交
1053
      }
L
Liu Jicong 已提交
1054

L
Liu Jicong 已提交
1055
      SSdbRaw *pRaw = mndSubActionEncode(pSub);
L
Liu Jicong 已提交
1056
      sdbSetRawStatus(pRaw, SDB_STATUS_READY);
L
Liu Jicong 已提交
1057
      mndTransAppendRedolog(pTrans, pRaw);
L
Liu Jicong 已提交
1058

L
Liu Jicong 已提交
1059 1060
      if (!createSub) mndReleaseSubscribe(pMnode, pSub);
      mndReleaseTopic(pMnode, pTopic);
L
Liu Jicong 已提交
1061 1062 1063
    }
  }

L
Liu Jicong 已提交
1064
  if (oldSub) taosArrayDestroyEx(oldSub, free);
L
Liu Jicong 已提交
1065 1066 1067 1068 1069 1070 1071

  // persist consumerObj
  SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pConsumer);
  sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
  mndTransAppendRedolog(pTrans, pConsumerRaw);

  if (mndTransPrepare(pMnode, pTrans) != 0) {
L
Liu Jicong 已提交
1072
    mError("mq-subscribe-trans:%d, failed to prepare since %s", pTrans->id, terrstr());
L
Liu Jicong 已提交
1073
    mndTransDrop(pTrans);
L
Liu Jicong 已提交
1074
    if (!createConsumer) mndReleaseConsumer(pMnode, pConsumer);
L
Liu Jicong 已提交
1075 1076 1077 1078
    return -1;
  }

  mndTransDrop(pTrans);
L
Liu Jicong 已提交
1079
  if (!createConsumer) mndReleaseConsumer(pMnode, pConsumer);
L
Liu Jicong 已提交
1080
  return TSDB_CODE_MND_ACTION_IN_PROGRESS;
L
Liu Jicong 已提交
1081 1082
}

L
Liu Jicong 已提交
1083 1084 1085 1086
static int32_t mndProcessSubscribeInternalRsp(SMnodeMsg *pRsp) {
  mndTransProcessRsp(pRsp);
  return 0;
}
L
Liu Jicong 已提交
1087 1088 1089 1090 1091

static void mndCancelGetNextConsumer(SMnode *pMnode, void *pIter) {
  SSdb *pSdb = pMnode->pSdb;
  sdbCancelFetch(pSdb, pIter);
}