mndSubscribe.c 40.9 KB
Newer Older
L
Liu Jicong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

S
Shengliang Guan 已提交
16
#define _DEFAULT_SOURCE
L
Liu Jicong 已提交
17
#include "mndSubscribe.h"
L
Liu Jicong 已提交
18 19 20 21
#include "mndConsumer.h"
#include "mndDb.h"
#include "mndDnode.h"
#include "mndMnode.h"
L
Liu Jicong 已提交
22
#include "mndOffset.h"
L
Liu Jicong 已提交
23
#include "mndScheduler.h"
L
Liu Jicong 已提交
24 25 26 27 28 29 30 31 32
#include "mndShow.h"
#include "mndStb.h"
#include "mndTopic.h"
#include "mndTrans.h"
#include "mndUser.h"
#include "mndVgroup.h"
#include "tcompare.h"
#include "tname.h"

L
Liu Jicong 已提交
33
#define MND_SUBSCRIBE_VER_NUMBER   1
L
Liu Jicong 已提交
34 35
#define MND_SUBSCRIBE_RESERVE_SIZE 64

L
Liu Jicong 已提交
36
#define MND_SUBSCRIBE_REBALANCE_CNT 3
L
Liu Jicong 已提交
37

L
Liu Jicong 已提交
38 39 40 41 42 43
enum {
  MQ_SUBSCRIBE_STATUS__ACTIVE = 1,
  MQ_SUBSCRIBE_STATUS__DELETED,
};

static char *mndMakeSubscribeKey(const char *cgroup, const char *topicName);
L
Liu Jicong 已提交
44

L
Liu Jicong 已提交
45 46 47 48 49 50 51 52 53 54
static SSdbRaw *mndSubActionEncode(SMqSubscribeObj *);
static SSdbRow *mndSubActionDecode(SSdbRaw *pRaw);
static int32_t  mndSubActionInsert(SSdb *pSdb, SMqSubscribeObj *);
static int32_t  mndSubActionDelete(SSdb *pSdb, SMqSubscribeObj *);
static int32_t  mndSubActionUpdate(SSdb *pSdb, SMqSubscribeObj *pOldSub, SMqSubscribeObj *pNewSub);

static int32_t mndProcessSubscribeReq(SMnodeMsg *pMsg);
static int32_t mndProcessSubscribeRsp(SMnodeMsg *pMsg);
static int32_t mndProcessSubscribeInternalReq(SMnodeMsg *pMsg);
static int32_t mndProcessSubscribeInternalRsp(SMnodeMsg *pMsg);
L
Liu Jicong 已提交
55
static int32_t mndProcessMqTimerMsg(SMnodeMsg *pMsg);
L
Liu Jicong 已提交
56
static int32_t mndProcessGetSubEpReq(SMnodeMsg *pMsg);
L
Liu Jicong 已提交
57
static int32_t mndProcessDoRebalanceMsg(SMnodeMsg *pMsg);
L
Liu Jicong 已提交
58
static int32_t mndProcessResetOffsetReq(SMnodeMsg *pMsg);
L
Liu Jicong 已提交
59

S
Shengliang Guan 已提交
60 61
static int32_t mndPersistMqSetConnReq(SMnode *pMnode, STrans *pTrans, const SMqTopicObj *pTopic, const char *cgroup,
                                      const SMqConsumerEp *pConsumerEp);
L
Liu Jicong 已提交
62

L
Liu Jicong 已提交
63 64
static int32_t mndPersistRebalanceMsg(SMnode *pMnode, STrans *pTrans, const SMqConsumerEp *pConsumerEp);

S
Shengliang Guan 已提交
65
static int32_t mndInitUnassignedVg(SMnode *pMnode, const SMqTopicObj *pTopic, SMqSubscribeObj *pSub);
L
Liu Jicong 已提交
66 67 68 69 70 71 72 73 74 75 76

int32_t mndInitSubscribe(SMnode *pMnode) {
  SSdbTable table = {.sdbType = SDB_SUBSCRIBE,
                     .keyType = SDB_KEY_BINARY,
                     .encodeFp = (SdbEncodeFp)mndSubActionEncode,
                     .decodeFp = (SdbDecodeFp)mndSubActionDecode,
                     .insertFp = (SdbInsertFp)mndSubActionInsert,
                     .updateFp = (SdbUpdateFp)mndSubActionUpdate,
                     .deleteFp = (SdbDeleteFp)mndSubActionDelete};

  mndSetMsgHandle(pMnode, TDMT_MND_SUBSCRIBE, mndProcessSubscribeReq);
L
Liu Jicong 已提交
77
  mndSetMsgHandle(pMnode, TDMT_VND_MQ_SET_CONN_RSP, mndProcessSubscribeInternalRsp);
L
Liu Jicong 已提交
78
  mndSetMsgHandle(pMnode, TDMT_VND_MQ_REB_RSP, mndProcessSubscribeInternalRsp);
L
Liu Jicong 已提交
79
  mndSetMsgHandle(pMnode, TDMT_MND_MQ_TIMER, mndProcessMqTimerMsg);
L
Liu Jicong 已提交
80
  mndSetMsgHandle(pMnode, TDMT_MND_GET_SUB_EP, mndProcessGetSubEpReq);
L
Liu Jicong 已提交
81
  mndSetMsgHandle(pMnode, TDMT_MND_MQ_DO_REBALANCE, mndProcessDoRebalanceMsg);
L
Liu Jicong 已提交
82 83 84
  return sdbSetTable(pMnode->pSdb, table);
}

L
Liu Jicong 已提交
85
static SMqSubscribeObj *mndCreateSubscription(SMnode *pMnode, const SMqTopicObj *pTopic, const char *cgroup) {
L
Liu Jicong 已提交
86 87 88 89 90
  SMqSubscribeObj *pSub = tNewSubscribeObj();
  if (pSub == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
L
Liu Jicong 已提交
91
  char *key = mndMakeSubscribeKey(cgroup, pTopic->name);
L
Liu Jicong 已提交
92 93 94 95 96 97 98 99 100
  if (key == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    tDeleteSMqSubscribeObj(pSub);
    free(pSub);
    return NULL;
  }
  strcpy(pSub->key, key);
  free(key);

L
Liu Jicong 已提交
101 102 103 104 105 106 107 108
  if (mndSchedInitSubEp(pMnode, pTopic, pSub) < 0) {
    terrno = TSDB_CODE_MND_UNSUPPORTED_TOPIC;
    tDeleteSMqSubscribeObj(pSub);
    free(pSub);
    return NULL;
  }

#if 0
L
Liu Jicong 已提交
109 110 111 112 113 114
  if (mndInitUnassignedVg(pMnode, pTopic, pSub) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    tDeleteSMqSubscribeObj(pSub);
    free(pSub);
    return NULL;
  }
L
Liu Jicong 已提交
115
#endif
L
Liu Jicong 已提交
116 117 118 119
  // TODO: disable alter subscribed table
  return pSub;
}

L
Liu Jicong 已提交
120
static int32_t mndBuildRebalanceMsg(void **pBuf, int32_t *pLen, const SMqConsumerEp *pConsumerEp) {
L
Liu Jicong 已提交
121
  SMqMVRebReq req = {
L
Liu Jicong 已提交
122 123 124
      .vgId = pConsumerEp->vgId,
      .oldConsumerId = pConsumerEp->oldConsumerId,
      .newConsumerId = pConsumerEp->consumerId,
L
Liu Jicong 已提交
125 126
  };

L
Liu Jicong 已提交
127
  int32_t tlen = tEncodeSMqMVRebReq(NULL, &req);
L
Liu Jicong 已提交
128 129 130 131 132 133 134 135 136
  void   *buf = malloc(sizeof(SMsgHead) + tlen);
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  SMsgHead *pMsgHead = (SMsgHead *)buf;

  pMsgHead->contLen = htonl(sizeof(SMsgHead) + tlen);
  pMsgHead->vgId = htonl(pConsumerEp->vgId);
L
Liu Jicong 已提交
137

L
Liu Jicong 已提交
138
  void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
L
Liu Jicong 已提交
139
  tEncodeSMqMVRebReq(&abuf, &req);
L
Liu Jicong 已提交
140

L
Liu Jicong 已提交
141 142 143 144 145 146
  *pBuf = buf;
  *pLen = tlen;

  return 0;
}

L
Liu Jicong 已提交
147
static int32_t mndPersistRebalanceMsg(SMnode *pMnode, STrans *pTrans, const SMqConsumerEp *pConsumerEp) {
148
  ASSERT(pConsumerEp->oldConsumerId != -1);
L
Liu Jicong 已提交
149 150 151

  void   *buf;
  int32_t tlen;
L
Liu Jicong 已提交
152
  if (mndBuildRebalanceMsg(&buf, &tlen, pConsumerEp) < 0) {
L
Liu Jicong 已提交
153 154 155
    return -1;
  }

S
Shengliang Guan 已提交
156 157 158
  int32_t vgId = pConsumerEp->vgId;
  SVgObj *pVgObj = mndAcquireVgroup(pMnode, vgId);

L
Liu Jicong 已提交
159 160 161 162
  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgObj);
  action.pCont = buf;
  action.contLen = sizeof(SMsgHead) + tlen;
163
  action.msgType = TDMT_VND_MQ_REB;
L
Liu Jicong 已提交
164 165 166 167 168 169 170 171 172 173 174 175

  mndReleaseVgroup(pMnode, pVgObj);
  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    free(buf);
    return -1;
  }

  return 0;
}

static int32_t mndBuildCancelConnReq(void **pBuf, int32_t *pLen, const SMqConsumerEp *pConsumerEp) {
  SMqSetCVgReq req = {0};
L
Liu Jicong 已提交
176
  req.consumerId = pConsumerEp->consumerId;
L
Liu Jicong 已提交
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201

  int32_t tlen = tEncodeSMqSetCVgReq(NULL, &req);
  void   *buf = malloc(sizeof(SMsgHead) + tlen);
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  SMsgHead *pMsgHead = (SMsgHead *)buf;

  pMsgHead->contLen = htonl(sizeof(SMsgHead) + tlen);
  pMsgHead->vgId = htonl(pConsumerEp->vgId);
  void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
  tEncodeSMqSetCVgReq(&abuf, &req);
  *pBuf = buf;
  *pLen = tlen;
  return 0;
}

static int32_t mndPersistCancelConnReq(SMnode *pMnode, STrans *pTrans, const SMqConsumerEp *pConsumerEp) {
  void   *buf;
  int32_t tlen;
  if (mndBuildCancelConnReq(&buf, &tlen, pConsumerEp) < 0) {
    return -1;
  }

S
Shengliang Guan 已提交
202 203 204
  int32_t vgId = pConsumerEp->vgId;
  SVgObj *pVgObj = mndAcquireVgroup(pMnode, vgId);

L
Liu Jicong 已提交
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgObj);
  action.pCont = buf;
  action.contLen = sizeof(SMsgHead) + tlen;
  action.msgType = TDMT_VND_MQ_SET_CONN;

  mndReleaseVgroup(pMnode, pVgObj);
  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    free(buf);
    return -1;
  }

  return 0;
}

L
Liu Jicong 已提交
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
#if 0
static int32_t mndProcessResetOffsetReq(SMnodeMsg *pMsg) {
  SMnode             *pMnode = pMsg->pMnode;
  uint8_t            *str = pMsg->rpcMsg.pCont;
  SMqCMResetOffsetReq req;

  SCoder decoder;
  tCoderInit(&decoder, TD_LITTLE_ENDIAN, str, pMsg->rpcMsg.contLen, TD_DECODER);
  tDecodeSMqCMResetOffsetReq(&decoder, &req);

  SHashObj *pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
  if (pHash == NULL) {
    return -1;
  }

  for (int32_t i = 0; i < req.num; i++) {
    SMqOffset    *pOffset = &req.offsets[i];
    SMqVgOffsets *pVgOffset = taosHashGet(pHash, &pOffset->vgId, sizeof(int32_t));
    if (pVgOffset == NULL) {
      pVgOffset = malloc(sizeof(SMqVgOffsets));
      if (pVgOffset == NULL) {
        return -1;
      }
      pVgOffset->offsets = taosArrayInit(0, sizeof(void *));
      taosArrayPush(pVgOffset->offsets, &pOffset);
    }
    taosHashPut(pHash, &pOffset->vgId, sizeof(int32_t), &pVgOffset, sizeof(void *));
  }

  STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, &pMsg->rpcMsg);
  if (pTrans == NULL) {
    mError("mq-reset-offset: failed since %s", terrstr());
    return -1;
  }

  return 0;
}
#endif

L
Liu Jicong 已提交
259 260
static int32_t mndProcessGetSubEpReq(SMnodeMsg *pMsg) {
  SMnode           *pMnode = pMsg->pMnode;
L
Liu Jicong 已提交
261
  SMqCMGetSubEpReq *pReq = (SMqCMGetSubEpReq *)pMsg->rpcMsg.pCont;
L
Liu Jicong 已提交
262
  SMqCMGetSubEpRsp  rsp = {0};
L
Liu Jicong 已提交
263
  int64_t           consumerId = be64toh(pReq->consumerId);
L
Liu Jicong 已提交
264
  int32_t           epoch = ntohl(pReq->epoch);
L
Liu Jicong 已提交
265 266 267

  SMqConsumerObj *pConsumer = mndAcquireConsumer(pMsg->pMnode, consumerId);
  if (pConsumer == NULL) {
L
Liu Jicong 已提交
268
    terrno = TSDB_CODE_MND_CONSUMER_NOT_EXIST;
L
Liu Jicong 已提交
269 270 271 272
    return -1;
  }
  ASSERT(strcmp(pReq->cgroup, pConsumer->cgroup) == 0);

L
Liu Jicong 已提交
273
  // TODO
L
Liu Jicong 已提交
274
  int32_t hbStatus = atomic_load_32(&pConsumer->hbStatus);
275
  mTrace("try to get sub ep, old val: %d", hbStatus);
L
Liu Jicong 已提交
276 277 278 279 280
  atomic_store_32(&pConsumer->hbStatus, 0);
  /*SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pConsumer);*/
  /*sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);*/
  /*sdbWrite(pMnode->pSdb, pConsumerRaw);*/

L
Liu Jicong 已提交
281 282
  strcpy(rsp.cgroup, pReq->cgroup);
  rsp.consumerId = consumerId;
L
Liu Jicong 已提交
283 284
  if (epoch != pConsumer->epoch) {
    mInfo("send new assignment to consumer, consumer epoch %d, server epoch %d", epoch, pConsumer->epoch);
L
Liu Jicong 已提交
285
    SArray *pTopics = pConsumer->currentTopics;
S
Shengliang Guan 已提交
286
    int32_t sz = taosArrayGetSize(pTopics);
L
Liu Jicong 已提交
287
    rsp.topics = taosArrayInit(sz, sizeof(SMqSubTopicEp));
S
Shengliang Guan 已提交
288
    for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
289 290 291
      char            *topicName = taosArrayGetP(pTopics, i);
      SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, pConsumer->cgroup, topicName);
      ASSERT(pSub);
S
Shengliang Guan 已提交
292
      int32_t csz = taosArrayGetSize(pSub->consumers);
L
Liu Jicong 已提交
293
      // TODO: change to bsearch
S
Shengliang Guan 已提交
294
      for (int32_t j = 0; j < csz; j++) {
L
Liu Jicong 已提交
295 296
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, j);
        if (consumerId == pSubConsumer->consumerId) {
S
Shengliang Guan 已提交
297
          int32_t       vgsz = taosArrayGetSize(pSubConsumer->vgInfo);
L
Liu Jicong 已提交
298
          SMqSubTopicEp topicEp;
L
Liu Jicong 已提交
299
          strcpy(topicEp.topic, topicName);
L
Liu Jicong 已提交
300
          topicEp.vgs = taosArrayInit(vgsz, sizeof(SMqSubVgEp));
S
Shengliang Guan 已提交
301
          for (int32_t k = 0; k < vgsz; k++) {
L
Liu Jicong 已提交
302
            char           offsetKey[TSDB_PARTITION_KEY_LEN];
L
Liu Jicong 已提交
303
            SMqConsumerEp *pConsumerEp = taosArrayGet(pSubConsumer->vgInfo, k);
L
Liu Jicong 已提交
304 305 306 307 308 309 310
            SMqSubVgEp     vgEp = {.epSet = pConsumerEp->epSet, .vgId = pConsumerEp->vgId, .offset = -1};
            mndMakePartitionKey(offsetKey, pConsumer->cgroup, topicName, pConsumerEp->vgId);
            SMqOffsetObj *pOffsetObj = mndAcquireOffset(pMnode, offsetKey);
            if (pOffsetObj != NULL) {
              vgEp.offset = pOffsetObj->offset;
              mndReleaseOffset(pMnode, pOffsetObj);
            }
L
Liu Jicong 已提交
311 312 313 314 315
            taosArrayPush(topicEp.vgs, &vgEp);
          }
          taosArrayPush(rsp.topics, &topicEp);
          break;
        }
L
Liu Jicong 已提交
316
      }
L
Liu Jicong 已提交
317
      mndReleaseSubscribe(pMnode, pSub);
L
Liu Jicong 已提交
318 319
    }
  }
L
Liu Jicong 已提交
320
  int32_t tlen = sizeof(SMqRspHead) + tEncodeSMqCMGetSubEpRsp(NULL, &rsp);
L
Liu Jicong 已提交
321
  void   *buf = rpcMallocCont(tlen);
L
Liu Jicong 已提交
322 323 324 325
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
L
Liu Jicong 已提交
326 327 328 329
  ((SMqRspHead *)buf)->mqMsgType = TMQ_MSG_TYPE__EP_RSP;
  ((SMqRspHead *)buf)->epoch = pConsumer->epoch;

  void *abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));
L
Liu Jicong 已提交
330
  tEncodeSMqCMGetSubEpRsp(&abuf, &rsp);
L
Liu Jicong 已提交
331
  tDeleteSMqCMGetSubEpRsp(&rsp);
L
Liu Jicong 已提交
332
  mndReleaseConsumer(pMnode, pConsumer);
L
Liu Jicong 已提交
333 334 335 336 337
  pMsg->pCont = buf;
  pMsg->contLen = tlen;
  return 0;
}

L
Liu Jicong 已提交
338
static int32_t mndSplitSubscribeKey(char *key, char **topic, char **cgroup) {
S
Shengliang Guan 已提交
339
  int32_t i = 0;
L
Liu Jicong 已提交
340 341 342 343
  while (key[i] != ':') {
    i++;
  }
  key[i] = 0;
L
Liu Jicong 已提交
344
  *cgroup = strdup(key);
L
Liu Jicong 已提交
345
  key[i] = ':';
L
Liu Jicong 已提交
346
  *topic = strdup(&key[i + 1]);
L
Liu Jicong 已提交
347 348 349
  return 0;
}

L
Liu Jicong 已提交
350 351 352 353 354 355 356 357 358 359 360 361 362
static SMqRebSubscribe *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) {
  SMqRebSubscribe *pRebSub = taosHashGet(pHash, key, strlen(key));
  if (pRebSub == NULL) {
    pRebSub = tNewSMqRebSubscribe(key);
    if (pRebSub == NULL) {
      // TODO
      return NULL;
    }
    taosHashPut(pHash, key, strlen(key), pRebSub, sizeof(SMqRebSubscribe));
  }
  return pRebSub;
}

L
Liu Jicong 已提交
363
static int32_t mndProcessMqTimerMsg(SMnodeMsg *pMsg) {
L
Liu Jicong 已提交
364 365 366 367 368 369 370
  SMnode            *pMnode = pMsg->pMnode;
  SSdb              *pSdb = pMnode->pSdb;
  SMqConsumerObj    *pConsumer;
  void              *pIter = NULL;
  SMqDoRebalanceMsg *pRebMsg = rpcMallocCont(sizeof(SMqDoRebalanceMsg));
  pRebMsg->rebSubHash = taosHashInit(64, MurmurHash3_32, true, HASH_NO_LOCK);

L
Liu Jicong 已提交
371 372 373
  while (1) {
    pIter = sdbFetch(pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer);
    if (pIter == NULL) break;
374
    int32_t hbStatus = atomic_add_fetch_32(&pConsumer->hbStatus, 1);
L
Liu Jicong 已提交
375 376 377 378
    if (hbStatus > MND_SUBSCRIBE_REBALANCE_CNT) {
      int32_t old =
          atomic_val_compare_exchange_32(&pConsumer->status, MQ_CONSUMER_STATUS__ACTIVE, MQ_CONSUMER_STATUS__LOST);
      if (old == MQ_CONSUMER_STATUS__ACTIVE) {
L
Liu Jicong 已提交
379
        // get all topics of that topic
S
Shengliang Guan 已提交
380 381
        int32_t sz = taosArrayGetSize(pConsumer->currentTopics);
        for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
          char            *topic = taosArrayGetP(pConsumer->currentTopics, i);
          char            *key = mndMakeSubscribeKey(pConsumer->cgroup, topic);
          SMqRebSubscribe *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
          taosArrayPush(pRebSub->lostConsumers, &pConsumer->consumerId);
        }
      }
    }
    int32_t status = atomic_load_32(&pConsumer->status);
    if (status == MQ_CONSUMER_STATUS__INIT || status == MQ_CONSUMER_STATUS__MODIFY) {
      SArray *rebSubs;
      if (status == MQ_CONSUMER_STATUS__INIT) {
        rebSubs = pConsumer->currentTopics;
      } else {
        rebSubs = pConsumer->recentRemovedTopics;
      }
S
Shengliang Guan 已提交
397 398
      int32_t sz = taosArrayGetSize(rebSubs);
      for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
399 400 401 402 403 404 405 406
        char            *topic = taosArrayGetP(rebSubs, i);
        char            *key = mndMakeSubscribeKey(pConsumer->cgroup, topic);
        SMqRebSubscribe *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
        if (status == MQ_CONSUMER_STATUS__INIT) {
          taosArrayPush(pRebSub->newConsumers, &pConsumer->consumerId);
        } else if (status == MQ_CONSUMER_STATUS__MODIFY) {
          taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
        }
L
Liu Jicong 已提交
407
      }
L
Liu Jicong 已提交
408 409 410 411 412 413 414 415
      if (status == MQ_CONSUMER_STATUS__MODIFY) {
        int32_t removeSz = taosArrayGetSize(pConsumer->recentRemovedTopics);
        for (int32_t i = 0; i < removeSz; i++) {
          char *topicName = taosArrayGet(pConsumer->recentRemovedTopics, i);
          free(topicName);
        }
        taosArrayClear(pConsumer->recentRemovedTopics);
      }
L
Liu Jicong 已提交
416 417
    }
  }
L
Liu Jicong 已提交
418 419 420 421 422 423 424 425
  if (taosHashGetSize(pRebMsg->rebSubHash) != 0) {
    mInfo("mq rebalance will be triggered");
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_MQ_DO_REBALANCE, .pCont = pRebMsg, .contLen = sizeof(SMqDoRebalanceMsg)};
    pMnode->putReqToMWriteQFp(pMnode->pDnode, &rpcMsg);
  } else {
    taosHashCleanup(pRebMsg->rebSubHash);
    rpcFreeCont(pRebMsg);
  }
L
Liu Jicong 已提交
426 427 428 429 430
  return 0;
}

static int32_t mndProcessDoRebalanceMsg(SMnodeMsg *pMsg) {
  SMnode            *pMnode = pMsg->pMnode;
S
Shengliang Guan 已提交
431 432
  SMqDoRebalanceMsg *pReq = pMsg->rpcMsg.pCont;
  STrans            *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_TYPE_REBALANCE, &pMsg->rpcMsg);
L
Liu Jicong 已提交
433 434 435 436 437 438 439 440 441 442 443 444 445
  void              *pIter = NULL;

  mInfo("mq rebalance start");

  while (1) {
    pIter = taosHashIterate(pReq->rebSubHash, pIter);
    if (pIter == NULL) break;
    SMqRebSubscribe *pRebSub = (SMqRebSubscribe *)pIter;
    SMqSubscribeObj *pSub = mndAcquireSubscribeByKey(pMnode, pRebSub->key);

    mInfo("mq rebalance subscription: %s", pSub->key);

    // remove lost consumer
S
Shengliang Guan 已提交
446
    for (int32_t i = 0; i < taosArrayGetSize(pRebSub->lostConsumers); i++) {
L
Liu Jicong 已提交
447 448 449 450
      int64_t lostConsumerId = *(int64_t *)taosArrayGet(pRebSub->lostConsumers, i);

      mInfo("mq remove lost consumer %ld", lostConsumerId);

S
Shengliang Guan 已提交
451
      for (int32_t j = 0; j < taosArrayGetSize(pSub->consumers); j++) {
L
Liu Jicong 已提交
452 453 454 455
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, j);
        if (pSubConsumer->consumerId == lostConsumerId) {
          taosArrayAddAll(pSub->unassignedVg, pSubConsumer->vgInfo);
          taosArrayPush(pSub->lostConsumers, pSubConsumer);
L
Liu Jicong 已提交
456 457 458 459 460 461 462 463
          taosArrayRemove(pSub->consumers, j);
          break;
        }
      }
    }

    // calculate rebalance
    int32_t consumerNum = taosArrayGetSize(pSub->consumers);
L
Liu Jicong 已提交
464 465 466
    if (consumerNum != 0) {
      int32_t vgNum = pSub->vgNum;
      int32_t vgEachConsumer = vgNum / consumerNum;
L
Liu Jicong 已提交
467 468 469 470
      int32_t imbalanceVg = vgNum % consumerNum;
      int32_t imbalanceSolved = 0;

      // iterate all consumers, set unassignedVgStash
S
Shengliang Guan 已提交
471
      for (int32_t i = 0; i < consumerNum; i++) {
L
Liu Jicong 已提交
472
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, i);
S
Shengliang Guan 已提交
473 474
        int32_t         vgThisConsumerBeforeRb = taosArrayGetSize(pSubConsumer->vgInfo);
        int32_t         vgThisConsumerAfterRb;
L
Liu Jicong 已提交
475 476 477 478
        if (i < imbalanceVg)
          vgThisConsumerAfterRb = vgEachConsumer + 1;
        else
          vgThisConsumerAfterRb = vgEachConsumer;
L
Liu Jicong 已提交
479

L
Liu Jicong 已提交
480 481
        mInfo("mq consumer:%ld, connectted vgroup number change from %d to %d", pSubConsumer->consumerId,
              vgThisConsumerBeforeRb, vgThisConsumerAfterRb);
L
Liu Jicong 已提交
482

L
Liu Jicong 已提交
483
        while (taosArrayGetSize(pSubConsumer->vgInfo) > vgThisConsumerAfterRb) {
L
Liu Jicong 已提交
484 485 486
          SMqConsumerEp *pConsumerEp = taosArrayPop(pSubConsumer->vgInfo);
          ASSERT(pConsumerEp != NULL);
          ASSERT(pConsumerEp->consumerId == pSubConsumer->consumerId);
L
Liu Jicong 已提交
487
          taosArrayPush(pSub->unassignedVg, pConsumerEp);
L
Liu Jicong 已提交
488 489
        }

L
Liu Jicong 已提交
490 491 492 493 494
        SMqConsumerObj *pRebConsumer = mndAcquireConsumer(pMnode, pSubConsumer->consumerId);
        int32_t         status = atomic_load_32(&pRebConsumer->status);
        if (vgThisConsumerAfterRb != vgThisConsumerBeforeRb ||
            (vgThisConsumerAfterRb != 0 && status != MQ_CONSUMER_STATUS__ACTIVE) ||
            (vgThisConsumerAfterRb == 0 && status != MQ_CONSUMER_STATUS__LOST)) {
L
Liu Jicong 已提交
495 496 497
          if (vgThisConsumerAfterRb != vgThisConsumerBeforeRb) {
            pRebConsumer->epoch++;
          }
L
Liu Jicong 已提交
498 499 500 501 502
          if (vgThisConsumerAfterRb != 0) {
            atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__ACTIVE);
          } else {
            atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__IDLE);
          }
L
Liu Jicong 已提交
503

L
Liu Jicong 已提交
504
          mInfo("mq consumer:%ld, status change from %d to %d", pRebConsumer->consumerId, status, pRebConsumer->status);
L
Liu Jicong 已提交
505

L
Liu Jicong 已提交
506 507 508 509 510
          SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pRebConsumer);
          sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
          mndTransAppendRedolog(pTrans, pConsumerRaw);
        }
        mndReleaseConsumer(pMnode, pRebConsumer);
L
Liu Jicong 已提交
511 512
      }

L
Liu Jicong 已提交
513
      // assign to vgroup
L
Liu Jicong 已提交
514
      if (taosArrayGetSize(pSub->unassignedVg) != 0) {
S
Shengliang Guan 已提交
515
        for (int32_t i = 0; i < consumerNum; i++) {
L
Liu Jicong 已提交
516
          SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, i);
S
Shengliang Guan 已提交
517 518
          int32_t         vgThisConsumerBeforeRb = taosArrayGetSize(pSubConsumer->vgInfo);
          int32_t         vgThisConsumerAfterRb;
L
Liu Jicong 已提交
519 520 521 522
          if (i < imbalanceVg)
            vgThisConsumerAfterRb = vgEachConsumer + 1;
          else
            vgThisConsumerAfterRb = vgEachConsumer;
L
Liu Jicong 已提交
523

L
Liu Jicong 已提交
524 525
          while (taosArrayGetSize(pSubConsumer->vgInfo) < vgThisConsumerAfterRb) {
            SMqConsumerEp *pConsumerEp = taosArrayPop(pSub->unassignedVg);
L
Liu Jicong 已提交
526 527 528 529
            ASSERT(pConsumerEp != NULL);

            pConsumerEp->oldConsumerId = pConsumerEp->consumerId;
            pConsumerEp->consumerId = pSubConsumer->consumerId;
L
Liu Jicong 已提交
530
            taosArrayPush(pSubConsumer->vgInfo, pConsumerEp);
L
Liu Jicong 已提交
531

532
            if (pConsumerEp->oldConsumerId == -1) {
L
Liu Jicong 已提交
533 534
              char *topic;
              char *cgroup;
535
              mndSplitSubscribeKey(pSub->key, &topic, &cgroup);
L
Liu Jicong 已提交
536
              SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic);
537

L
Liu Jicong 已提交
538 539
              mInfo("mq set conn: assign vgroup %d of topic %s to consumer %ld", pConsumerEp->vgId, topic,
                    pConsumerEp->consumerId);
540 541 542 543 544 545

              mndPersistMqSetConnReq(pMnode, pTrans, pTopic, cgroup, pConsumerEp);
              mndReleaseTopic(pMnode, pTopic);
              free(topic);
              free(cgroup);
            } else {
L
Liu Jicong 已提交
546 547
              mInfo("mq rebalance: assign vgroup %d, from consumer %ld to consumer %ld", pConsumerEp->vgId,
                    pConsumerEp->oldConsumerId, pConsumerEp->consumerId);
L
Liu Jicong 已提交
548

549 550
              mndPersistRebalanceMsg(pMnode, pTrans, pConsumerEp);
            }
L
Liu Jicong 已提交
551 552 553
          }
        }
      }
L
Liu Jicong 已提交
554
      ASSERT(taosArrayGetSize(pSub->unassignedVg) == 0);
L
Liu Jicong 已提交
555 556 557 558 559 560 561 562 563 564

      // TODO: log rebalance statistics
      SSdbRaw *pSubRaw = mndSubActionEncode(pSub);
      sdbSetRawStatus(pSubRaw, SDB_STATUS_READY);
      mndTransAppendRedolog(pTrans, pSubRaw);
    }
    mndReleaseSubscribe(pMnode, pSub);
  }
  if (mndTransPrepare(pMnode, pTrans) != 0) {
    mError("mq-rebalance-trans:%d, failed to prepare since %s", pTrans->id, terrstr());
L
Liu Jicong 已提交
565
    taosHashCleanup(pReq->rebSubHash);
L
Liu Jicong 已提交
566 567 568 569
    mndTransDrop(pTrans);
    return -1;
  }

L
Liu Jicong 已提交
570
  taosHashCleanup(pReq->rebSubHash);
L
Liu Jicong 已提交
571 572 573 574 575
  mndTransDrop(pTrans);
  return 0;
}

#if 0
L
Liu Jicong 已提交
576 577
      for (int32_t j = 0; j < consumerNum; j++) {
        bool            changed = false;
L
Liu Jicong 已提交
578 579 580 581 582 583 584 585 586
        bool            unfished = false;

        bool            canUseLeft = imbalanceSolved < imbalanceVg;
        bool            mustUseLeft = canUseLeft && (imbalanceVg - imbalanceSolved >= consumerNum - j);
        ASSERT(imbalanceVg - imbalanceSolved <= consumerNum - j);

        int32_t maxVg = vgEachConsumer + canUseLeft;
        int32_t minVg = vgEachConsumer + mustUseLeft;

L
Liu Jicong 已提交
587
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, j);
L
Liu Jicong 已提交
588 589 590 591 592 593 594 595 596
        int32_t         vgThisConsumerBeforeRb = taosArrayGetSize(pSubConsumer->vgInfo);
        int32_t         vgThisConsumerAfterRb;
        if (vgThisConsumerBeforeRb > maxVg) {
          vgThisConsumerAfterRb = maxVg; 
          imbalanceSolved++;
          changed = true;
        } else if (vgThisConsumerBeforeRb < minVg) {
          vgThisConsumerAfterRb = minVg;
          if (mustUseLeft) imbalanceSolved++;
L
Liu Jicong 已提交
597
          changed = true;
L
Liu Jicong 已提交
598 599 600 601 602 603 604
        } else {
          vgThisConsumerAfterRb = vgThisConsumerBeforeRb;
        }

        if (vgThisConsumerBeforeRb > vgThisConsumerAfterRb) {
          while (taosArrayGetSize(pSubConsumer->vgInfo) > vgThisConsumerAfterRb) {
            // put into unassigned
L
Liu Jicong 已提交
605 606
            SMqConsumerEp *pConsumerEp = taosArrayPop(pSubConsumer->vgInfo);
            ASSERT(pConsumerEp != NULL);
L
Liu Jicong 已提交
607
            ASSERT(pConsumerEp->consumerId == pSubConsumer->consumerId);
L
Liu Jicong 已提交
608 609
            taosArrayPush(unassignedVgStash, pConsumerEp);
          }
L
Liu Jicong 已提交
610 611

        } else if (vgThisConsumerBeforeRb < vgThisConsumerAfterRb) {
L
Liu Jicong 已提交
612
          // assign from unassigned
L
Liu Jicong 已提交
613
          while (taosArrayGetSize(pSubConsumer->vgInfo) < vgThisConsumerAfterRb) {
L
Liu Jicong 已提交
614 615
            // if no unassgined, save j
            if (taosArrayGetSize(unassignedVgStash) == 0) {
L
Liu Jicong 已提交
616 617
              taosArrayPush(unassignedConsumerIdx, &j);
              unfished = true;
L
Liu Jicong 已提交
618 619
              break;
            }
L
Liu Jicong 已提交
620
            // assign vg to consumer
L
Liu Jicong 已提交
621 622 623 624 625 626 627 628
            SMqConsumerEp *pConsumerEp = taosArrayPop(unassignedVgStash);
            ASSERT(pConsumerEp != NULL);
            pConsumerEp->oldConsumerId = pConsumerEp->consumerId;
            pConsumerEp->consumerId = pSubConsumer->consumerId;
            taosArrayPush(pSubConsumer->vgInfo, pConsumerEp);
            // build msg and persist into trans
          }
        }
L
Liu Jicong 已提交
629 630

        if (changed && !unfished) {
L
Liu Jicong 已提交
631 632
          SMqConsumerObj *pRebConsumer = mndAcquireConsumer(pMnode, pSubConsumer->consumerId);
          pRebConsumer->epoch++;
L
Liu Jicong 已提交
633 634 635 636 637 638
          if (vgThisConsumerAfterRb != 0) {
            atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__ACTIVE);
          } else {
            atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__IDLE);
          }
          SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pRebConsumer);
L
Liu Jicong 已提交
639
          sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
L
Liu Jicong 已提交
640
          mndTransAppendRedolog(pTrans, pConsumerRaw);
L
Liu Jicong 已提交
641 642
          mndReleaseConsumer(pMnode, pRebConsumer);
          // TODO: save history
L
Liu Jicong 已提交
643 644 645
        }
      }

L
Liu Jicong 已提交
646 647 648
      for (int32_t j = 0; j < taosArrayGetSize(unassignedConsumerIdx); j++) {
        bool            canUseLeft = imbalanceSolved < imbalanceVg;
        int32_t         consumerIdx = *(int32_t *)taosArrayGet(unassignedConsumerIdx, j);
L
Liu Jicong 已提交
649
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, consumerIdx);
L
Liu Jicong 已提交
650 651 652 653 654
        if (canUseLeft) imbalanceSolved++;
        // must use
        int32_t vgThisConsumerAfterRb = taosArrayGetSize(pSubConsumer->vgInfo) + canUseLeft;
        while (taosArrayGetSize(pSubConsumer->vgInfo) < vgEachConsumer + canUseLeft) {
          // assign vg to consumer
L
Liu Jicong 已提交
655 656 657 658 659 660 661
          SMqConsumerEp *pConsumerEp = taosArrayPop(unassignedVgStash);
          ASSERT(pConsumerEp != NULL);
          pConsumerEp->oldConsumerId = pConsumerEp->consumerId;
          pConsumerEp->consumerId = pSubConsumer->consumerId;
          taosArrayPush(pSubConsumer->vgInfo, pConsumerEp);
          // build msg and persist into trans
        }
L
Liu Jicong 已提交
662 663 664 665 666 667 668 669
        SMqConsumerObj *pRebConsumer = mndAcquireConsumer(pMnode, pSubConsumer->consumerId);
        pRebConsumer->epoch++;
        atomic_store_32(&pRebConsumer->status, MQ_CONSUMER_STATUS__ACTIVE);
        SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pRebConsumer);
        sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
        mndTransAppendRedolog(pTrans, pConsumerRaw);
        mndReleaseConsumer(pMnode, pRebConsumer);
        // TODO: save history
L
Liu Jicong 已提交
670
      }
L
Liu Jicong 已提交
671
#endif
L
Liu Jicong 已提交
672 673 674

#if 0
    //update consumer status for the subscribption
S
Shengliang Guan 已提交
675
    for (int32_t i = 0; i < taosArrayGetSize(pSub->assigned); i++) {
L
Liu Jicong 已提交
676 677
      SMqConsumerEp *pCEp = taosArrayGet(pSub->assigned, i);
      int64_t        consumerId = pCEp->consumerId;
L
Liu Jicong 已提交
678 679 680 681 682
      if (pCEp->status != -1) {
        int32_t consumerHbStatus = atomic_fetch_add_32(&pCEp->consumerHbStatus, 1);
        if (consumerHbStatus < MND_SUBSCRIBE_REBALANCE_CNT) {
          continue;
        }
L
Liu Jicong 已提交
683
        // put consumer into lostConsumer
L
Liu Jicong 已提交
684 685
        SMqConsumerEp* lostConsumer = taosArrayPush(pSub->lostConsumer, pCEp);
        lostConsumer->qmsg = NULL;
L
Liu Jicong 已提交
686
        // put vg into unassigned
L
Liu Jicong 已提交
687 688 689 690 691
        taosArrayPush(pSub->unassignedVg, pCEp);
        // remove from assigned
        // TODO: swap with last one, reduce size and reset i
        taosArrayRemove(pSub->assigned, i);
        // remove from available consumer
S
Shengliang Guan 已提交
692
        for (int32_t j = 0; j < taosArrayGetSize(pSub->availConsumer); j++) {
L
Liu Jicong 已提交
693
          if (*(int64_t *)taosArrayGet(pSub->availConsumer, i) == pCEp->consumerId) {
L
Liu Jicong 已提交
694 695 696 697 698
            taosArrayRemove(pSub->availConsumer, j);
            break;
          }
          // TODO: acquire consumer, set status to unavail
        }
L
Liu Jicong 已提交
699
#if 0
L
Liu Jicong 已提交
700 701
        SMqConsumerObj* pConsumer = mndAcquireConsumer(pMnode, consumerId);
        pConsumer->epoch++;
L
Liu Jicong 已提交
702
        printf("current epoch %ld size %ld", pConsumer->epoch, pConsumer->topics->size);
L
Liu Jicong 已提交
703 704 705 706
        SSdbRaw* pRaw = mndConsumerActionEncode(pConsumer);
        sdbSetRawStatus(pRaw, SDB_STATUS_READY);
        sdbWriteNotFree(pMnode->pSdb, pRaw);
        mndReleaseConsumer(pMnode, pConsumer);
L
Liu Jicong 已提交
707
#endif
L
Liu Jicong 已提交
708 709
      }
    }
L
Liu Jicong 已提交
710 711 712 713 714 715 716 717
    // no available consumer, skip rebalance
    if (taosArrayGetSize(pSub->availConsumer) == 0) {
      continue;
    }
    taosArrayGet(pSub->availConsumer, 0);
    // rebalance condition1 : have unassigned vg
    // assign vg to a consumer, trying to find the least assigned one
    if ((sz = taosArrayGetSize(pSub->unassignedVg)) > 0) {
L
Liu Jicong 已提交
718 719 720 721 722 723
      char *topic = NULL;
      char *cgroup = NULL;
      mndSplitSubscribeKey(pSub->key, &topic, &cgroup);

      SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic);
      STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pMsg->rpcMsg);
L
Liu Jicong 已提交
724
      for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
725
        int64_t        consumerId = *(int64_t *)taosArrayGet(pSub->availConsumer, pSub->nextConsumerIdx);
L
Liu Jicong 已提交
726 727
        pSub->nextConsumerIdx = (pSub->nextConsumerIdx + 1) % taosArrayGetSize(pSub->availConsumer);

L
Liu Jicong 已提交
728
        SMqConsumerEp *pCEp = taosArrayPop(pSub->unassignedVg);
L
Liu Jicong 已提交
729
        pCEp->oldConsumerId = pCEp->consumerId;
L
Liu Jicong 已提交
730 731 732
        pCEp->consumerId = consumerId;
        taosArrayPush(pSub->assigned, pCEp);

L
Liu Jicong 已提交
733
        SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, consumerId);
L
Liu Jicong 已提交
734
        pConsumer->epoch++;
L
Liu Jicong 已提交
735 736 737
        SSdbRaw* pConsumerRaw = mndConsumerActionEncode(pConsumer);
        sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
        sdbWrite(pMnode->pSdb, pConsumerRaw);
L
Liu Jicong 已提交
738 739
        mndReleaseConsumer(pMnode, pConsumer);

L
Liu Jicong 已提交
740 741 742
        void* msg;
        int32_t msgLen;
        mndBuildRebalanceMsg(&msg, &msgLen, pTopic, pCEp, cgroup, topic);
L
Liu Jicong 已提交
743 744 745

        // persist msg
        STransAction action = {0};
746
        action.epSet = pCEp->epSet;
L
Liu Jicong 已提交
747 748
        action.pCont = msg;
        action.contLen = sizeof(SMsgHead) + msgLen;
L
Liu Jicong 已提交
749 750 751
        action.msgType = TDMT_VND_MQ_SET_CONN;
        mndTransAppendRedoAction(pTrans, &action);

L
Liu Jicong 已提交
752
        // persist data
L
Liu Jicong 已提交
753
        SSdbRaw *pRaw = mndSubActionEncode(pSub);
L
Liu Jicong 已提交
754
        sdbSetRawStatus(pRaw, SDB_STATUS_READY);
L
Liu Jicong 已提交
755 756
        mndTransAppendRedolog(pTrans, pRaw);
      }
L
Liu Jicong 已提交
757

L
Liu Jicong 已提交
758 759 760
      if (mndTransPrepare(pMnode, pTrans) != 0) {
        mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr());
      }
L
Liu Jicong 已提交
761
      mndReleaseTopic(pMnode, pTopic);
L
Liu Jicong 已提交
762
      mndTransDrop(pTrans);
L
Liu Jicong 已提交
763 764
      tfree(topic);
      tfree(cgroup);
L
Liu Jicong 已提交
765
    }
L
Liu Jicong 已提交
766
    // rebalance condition2 : imbalance assignment
L
Liu Jicong 已提交
767 768 769
  }
  return 0;
}
L
Liu Jicong 已提交
770
#endif
L
Liu Jicong 已提交
771

S
Shengliang Guan 已提交
772
static int32_t mndInitUnassignedVg(SMnode *pMnode, const SMqTopicObj *pTopic, SMqSubscribeObj *pSub) {
L
Liu Jicong 已提交
773 774
  SSdb      *pSdb = pMnode->pSdb;
  SVgObj    *pVgroup = NULL;
L
Liu Jicong 已提交
775
  SQueryDag *pDag = qStringToDag(pTopic->physicalPlan);
L
Liu Jicong 已提交
776
  SArray    *pArray = NULL;
L
Liu Jicong 已提交
777 778
  SArray    *inner = taosArrayGet(pDag->pSubplans, 0);
  SSubplan  *plan = taosArrayGetP(inner, 0);
L
Liu Jicong 已提交
779
  SArray    *unassignedVg = pSub->unassignedVg;
L
Liu Jicong 已提交
780

L
Liu Jicong 已提交
781 782 783 784
  void *pIter = NULL;
  while (1) {
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
    if (pIter == NULL) break;
S
Shengliang Guan 已提交
785 786 787 788
    if (pVgroup->dbUid != pTopic->dbUid) {
      sdbRelease(pSdb, pVgroup);
      continue;
    }
L
Liu Jicong 已提交
789

L
Liu Jicong 已提交
790
    pSub->vgNum++;
L
Liu Jicong 已提交
791 792 793 794 795 796 797 798
    plan->execNode.nodeId = pVgroup->vgId;
    plan->execNode.epset = mndGetVgroupEpset(pMnode, pVgroup);

    if (schedulerConvertDagToTaskList(pDag, &pArray) < 0) {
      terrno = TSDB_CODE_MND_UNSUPPORTED_TOPIC;
      mError("unsupport topic: %s, sql: %s", pTopic->name, pTopic->sql);
      return -1;
    }
L
Liu Jicong 已提交
799

L
Liu Jicong 已提交
800 801 802
    SMqConsumerEp consumerEp = {0};
    consumerEp.status = 0;
    consumerEp.consumerId = -1;
L
Liu Jicong 已提交
803
    STaskInfo *pTaskInfo = taosArrayGet(pArray, 0);
L
Liu Jicong 已提交
804 805
    consumerEp.epSet = pTaskInfo->addr.epset;
    consumerEp.vgId = pTaskInfo->addr.nodeId;
L
Liu Jicong 已提交
806

L
Liu Jicong 已提交
807 808 809
    ASSERT(consumerEp.vgId == pVgroup->vgId);
    consumerEp.qmsg = strdup(pTaskInfo->msg->msg);
    taosArrayPush(unassignedVg, &consumerEp);
L
Liu Jicong 已提交
810
    // TODO: free taskInfo
L
Liu Jicong 已提交
811
    taosArrayDestroy(pArray);
L
Liu Jicong 已提交
812
  }
813

L
Liu Jicong 已提交
814
  /*qDestroyQueryDag(pDag);*/
815
  return 0;
L
Liu Jicong 已提交
816 817
}

S
Shengliang Guan 已提交
818 819
static int32_t mndPersistMqSetConnReq(SMnode *pMnode, STrans *pTrans, const SMqTopicObj *pTopic, const char *cgroup,
                                      const SMqConsumerEp *pConsumerEp) {
820
  ASSERT(pConsumerEp->oldConsumerId == -1);
L
Liu Jicong 已提交
821 822 823 824
  int32_t vgId = pConsumerEp->vgId;

  SMqSetCVgReq req = {
      .vgId = vgId,
L
Liu Jicong 已提交
825
      .consumerId = pConsumerEp->consumerId,
L
Liu Jicong 已提交
826 827 828 829 830 831 832 833 834 835 836 837 838 839
      .sql = pTopic->sql,
      .logicalPlan = pTopic->logicalPlan,
      .physicalPlan = pTopic->physicalPlan,
      .qmsg = pConsumerEp->qmsg,
  };

  strcpy(req.cgroup, cgroup);
  strcpy(req.topicName, pTopic->name);
  int32_t tlen = tEncodeSMqSetCVgReq(NULL, &req);
  void   *buf = malloc(sizeof(SMsgHead) + tlen);
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
L
Liu Jicong 已提交
840

L
Liu Jicong 已提交
841
  SMsgHead *pMsgHead = (SMsgHead *)buf;
L
Liu Jicong 已提交
842

L
Liu Jicong 已提交
843 844
  pMsgHead->contLen = htonl(sizeof(SMsgHead) + tlen);
  pMsgHead->vgId = htonl(vgId);
L
Liu Jicong 已提交
845

L
Liu Jicong 已提交
846 847
  void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
  tEncodeSMqSetCVgReq(&abuf, &req);
L
Liu Jicong 已提交
848

S
Shengliang Guan 已提交
849 850
  SVgObj *pVgObj = mndAcquireVgroup(pMnode, vgId);

L
Liu Jicong 已提交
851 852 853 854 855
  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgObj);
  action.pCont = buf;
  action.contLen = sizeof(SMsgHead) + tlen;
  action.msgType = TDMT_VND_MQ_SET_CONN;
L
Liu Jicong 已提交
856

L
Liu Jicong 已提交
857 858 859 860
  mndReleaseVgroup(pMnode, pVgObj);
  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    free(buf);
    return -1;
L
Liu Jicong 已提交
861 862 863 864 865 866 867
  }
  return 0;
}

void mndCleanupSubscribe(SMnode *pMnode) {}

static SSdbRaw *mndSubActionEncode(SMqSubscribeObj *pSub) {
L
Liu Jicong 已提交
868
  terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
869
  void   *buf = NULL;
L
Liu Jicong 已提交
870
  int32_t tlen = tEncodeSubscribeObj(NULL, pSub);
L
Liu Jicong 已提交
871
  int32_t size = sizeof(int32_t) + tlen + MND_SUBSCRIBE_RESERVE_SIZE;
L
Liu Jicong 已提交
872 873 874 875

  SSdbRaw *pRaw = sdbAllocRaw(SDB_SUBSCRIBE, MND_SUBSCRIBE_VER_NUMBER, size);
  if (pRaw == NULL) goto SUB_ENCODE_OVER;

L
Liu Jicong 已提交
876
  buf = malloc(tlen);
L
Liu Jicong 已提交
877
  if (buf == NULL) goto SUB_ENCODE_OVER;
L
Liu Jicong 已提交
878

L
Liu Jicong 已提交
879 880
  void *abuf = buf;
  tEncodeSubscribeObj(&abuf, pSub);
L
Liu Jicong 已提交
881 882 883 884 885 886 887

  int32_t dataPos = 0;
  SDB_SET_INT32(pRaw, dataPos, tlen, SUB_ENCODE_OVER);
  SDB_SET_BINARY(pRaw, dataPos, buf, tlen, SUB_ENCODE_OVER);
  SDB_SET_RESERVE(pRaw, dataPos, MND_SUBSCRIBE_RESERVE_SIZE, SUB_ENCODE_OVER);
  SDB_SET_DATALEN(pRaw, dataPos, SUB_ENCODE_OVER);

L
Liu Jicong 已提交
888 889
  terrno = TSDB_CODE_SUCCESS;

L
Liu Jicong 已提交
890
SUB_ENCODE_OVER:
L
Liu Jicong 已提交
891
  tfree(buf);
L
Liu Jicong 已提交
892
  if (terrno != TSDB_CODE_SUCCESS) {
L
Liu Jicong 已提交
893 894 895 896 897 898 899 900 901 902 903
    mError("subscribe:%s, failed to encode to raw:%p since %s", pSub->key, pRaw, terrstr());
    sdbFreeRaw(pRaw);
    return NULL;
  }

  mTrace("subscribe:%s, encode to raw:%p, row:%p", pSub->key, pRaw, pSub);
  return pRaw;
}

static SSdbRow *mndSubActionDecode(SSdbRaw *pRaw) {
  terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
904
  void *buf = NULL;
L
Liu Jicong 已提交
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922

  int8_t sver = 0;
  if (sdbGetRawSoftVer(pRaw, &sver) != 0) goto SUB_DECODE_OVER;

  if (sver != MND_SUBSCRIBE_VER_NUMBER) {
    terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
    goto SUB_DECODE_OVER;
  }

  int32_t  size = sizeof(SMqSubscribeObj);
  SSdbRow *pRow = sdbAllocRow(size);
  if (pRow == NULL) goto SUB_DECODE_OVER;

  SMqSubscribeObj *pSub = sdbGetRowObj(pRow);
  if (pSub == NULL) goto SUB_DECODE_OVER;

  int32_t dataPos = 0;
  int32_t tlen;
L
Liu Jicong 已提交
923
  SDB_GET_INT32(pRaw, dataPos, &tlen, SUB_DECODE_OVER);
L
Liu Jicong 已提交
924
  buf = malloc(tlen + 1);
L
Liu Jicong 已提交
925 926 927 928 929 930 931 932
  if (buf == NULL) goto SUB_DECODE_OVER;
  SDB_GET_BINARY(pRaw, dataPos, buf, tlen, SUB_DECODE_OVER);
  SDB_GET_RESERVE(pRaw, dataPos, MND_SUBSCRIBE_RESERVE_SIZE, SUB_DECODE_OVER);

  if (tDecodeSubscribeObj(buf, pSub) == NULL) {
    goto SUB_DECODE_OVER;
  }

L
Liu Jicong 已提交
933 934
  terrno = TSDB_CODE_SUCCESS;

L
Liu Jicong 已提交
935
SUB_DECODE_OVER:
L
Liu Jicong 已提交
936
  tfree(buf);
L
Liu Jicong 已提交
937
  if (terrno != TSDB_CODE_SUCCESS) {
L
Liu Jicong 已提交
938 939 940 941 942 943 944 945 946 947 948 949 950 951 952
    mError("subscribe:%s, failed to decode from raw:%p since %s", pSub->key, pRaw, terrstr());
    tfree(pRow);
    return NULL;
  }

  return pRow;
}

static int32_t mndSubActionInsert(SSdb *pSdb, SMqSubscribeObj *pSub) {
  mTrace("subscribe:%s, perform insert action", pSub->key);
  return 0;
}

static int32_t mndSubActionDelete(SSdb *pSdb, SMqSubscribeObj *pSub) {
  mTrace("subscribe:%s, perform delete action", pSub->key);
L
Liu Jicong 已提交
953
  tDeleteSMqSubscribeObj(pSub);
L
Liu Jicong 已提交
954 955 956 957 958 959 960 961
  return 0;
}

static int32_t mndSubActionUpdate(SSdb *pSdb, SMqSubscribeObj *pOldSub, SMqSubscribeObj *pNewSub) {
  mTrace("subscribe:%s, perform update action", pOldSub->key);
  return 0;
}

L
Liu Jicong 已提交
962
static char *mndMakeSubscribeKey(const char *cgroup, const char *topicName) {
L
Liu Jicong 已提交
963 964 965 966
  char *key = malloc(TSDB_SHOW_SUBQUERY_LEN);
  if (key == NULL) {
    return NULL;
  }
S
Shengliang Guan 已提交
967
  int32_t tlen = strlen(cgroup);
L
Liu Jicong 已提交
968 969 970 971 972 973
  memcpy(key, cgroup, tlen);
  key[tlen] = ':';
  strcpy(key + tlen + 1, topicName);
  return key;
}

L
Liu Jicong 已提交
974
SMqSubscribeObj *mndAcquireSubscribe(SMnode *pMnode, const char *cgroup, const char *topicName) {
L
Liu Jicong 已提交
975 976 977 978 979
  SSdb            *pSdb = pMnode->pSdb;
  char            *key = mndMakeSubscribeKey(cgroup, topicName);
  SMqSubscribeObj *pSub = sdbAcquire(pSdb, SDB_SUBSCRIBE, key);
  free(key);
  if (pSub == NULL) {
980
    terrno = TSDB_CODE_MND_SUBSCRIBE_NOT_EXIST;
L
Liu Jicong 已提交
981 982 983 984
  }
  return pSub;
}

L
Liu Jicong 已提交
985 986 987 988
SMqSubscribeObj *mndAcquireSubscribeByKey(SMnode *pMnode, const char *key) {
  SSdb            *pSdb = pMnode->pSdb;
  SMqSubscribeObj *pSub = sdbAcquire(pSdb, SDB_SUBSCRIBE, key);
  if (pSub == NULL) {
989
    terrno = TSDB_CODE_MND_SUBSCRIBE_NOT_EXIST;
L
Liu Jicong 已提交
990 991 992 993
  }
  return pSub;
}

L
Liu Jicong 已提交
994 995 996 997 998 999 1000 1001 1002 1003 1004
void mndReleaseSubscribe(SMnode *pMnode, SMqSubscribeObj *pSub) {
  SSdb *pSdb = pMnode->pSdb;
  sdbRelease(pSdb, pSub);
}

static int32_t mndProcessSubscribeReq(SMnodeMsg *pMsg) {
  SMnode         *pMnode = pMsg->pMnode;
  char           *msgStr = pMsg->rpcMsg.pCont;
  SCMSubscribeReq subscribe;
  tDeserializeSCMSubscribeReq(msgStr, &subscribe);
  int64_t consumerId = subscribe.consumerId;
L
Liu Jicong 已提交
1005
  char   *cgroup = subscribe.consumerGroup;
L
Liu Jicong 已提交
1006 1007

  SArray *newSub = subscribe.topicNames;
S
Shengliang Guan 已提交
1008
  int32_t newTopicNum = subscribe.topicNum;
L
Liu Jicong 已提交
1009 1010 1011 1012

  taosArraySortString(newSub, taosArrayCompareString);

  SArray *oldSub = NULL;
S
Shengliang Guan 已提交
1013
  int32_t oldTopicNum = 0;
L
Liu Jicong 已提交
1014
  bool    createConsumer = false;
L
Liu Jicong 已提交
1015 1016 1017 1018
  // create consumer if not exist
  SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, consumerId);
  if (pConsumer == NULL) {
    // create consumer
L
Liu Jicong 已提交
1019 1020
    pConsumer = mndCreateConsumer(consumerId, cgroup);
    createConsumer = true;
L
Liu Jicong 已提交
1021
  } else {
L
Liu Jicong 已提交
1022
    pConsumer->epoch++;
L
Liu Jicong 已提交
1023
    oldSub = pConsumer->currentTopics;
L
Liu Jicong 已提交
1024
  }
L
Liu Jicong 已提交
1025
  pConsumer->currentTopics = newSub;
L
Liu Jicong 已提交
1026 1027 1028 1029 1030

  if (oldSub != NULL) {
    oldTopicNum = taosArrayGetSize(oldSub);
  }

S
Shengliang Guan 已提交
1031
  STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_TYPE_SUBSCRIBE, &pMsg->rpcMsg);
L
Liu Jicong 已提交
1032 1033 1034 1035 1036
  if (pTrans == NULL) {
    // TODO: free memory
    return -1;
  }

S
Shengliang Guan 已提交
1037
  int32_t i = 0, j = 0;
L
Liu Jicong 已提交
1038
  while (i < newTopicNum || j < oldTopicNum) {
L
Liu Jicong 已提交
1039 1040
    char *newTopicName = NULL;
    char *oldTopicName = NULL;
L
Liu Jicong 已提交
1041 1042
    if (i >= newTopicNum) {
      // encode unset topic msg to all vnodes related to that topic
L
Liu Jicong 已提交
1043
      oldTopicName = taosArrayGetP(oldSub, j);
L
Liu Jicong 已提交
1044 1045
      j++;
    } else if (j >= oldTopicNum) {
L
Liu Jicong 已提交
1046
      newTopicName = taosArrayGetP(newSub, i);
L
Liu Jicong 已提交
1047 1048
      i++;
    } else {
L
Liu Jicong 已提交
1049
      newTopicName = taosArrayGetP(newSub, i);
L
Liu Jicong 已提交
1050
      oldTopicName = taosArrayGetP(oldSub, j);
L
Liu Jicong 已提交
1051

S
Shengliang Guan 已提交
1052
      int32_t comp = compareLenPrefixedStr(newTopicName, oldTopicName);
L
Liu Jicong 已提交
1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
      if (comp == 0) {
        // do nothing
        oldTopicName = newTopicName = NULL;
        i++;
        j++;
        continue;
      } else if (comp < 0) {
        oldTopicName = NULL;
        i++;
      } else {
        newTopicName = NULL;
        j++;
      }
    }

    if (oldTopicName != NULL) {
L
Liu Jicong 已提交
1069 1070 1071 1072 1073
      ASSERT(newTopicName == NULL);

      // cancel subscribe of old topic
      SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, cgroup, oldTopicName);
      ASSERT(pSub);
S
Shengliang Guan 已提交
1074 1075
      int32_t csz = taosArrayGetSize(pSub->consumers);
      for (int32_t ci = 0; ci < csz; ci++) {
L
Liu Jicong 已提交
1076 1077
        SMqSubConsumer *pSubConsumer = taosArrayGet(pSub->consumers, ci);
        if (pSubConsumer->consumerId == consumerId) {
S
Shengliang Guan 已提交
1078 1079
          int32_t vgsz = taosArrayGetSize(pSubConsumer->vgInfo);
          for (int32_t vgi = 0; vgi < vgsz; vgi++) {
L
Liu Jicong 已提交
1080 1081
            SMqConsumerEp *pConsumerEp = taosArrayGet(pSubConsumer->vgInfo, vgi);
            mndPersistCancelConnReq(pMnode, pTrans, pConsumerEp);
L
Liu Jicong 已提交
1082
            taosArrayPush(pSub->unassignedVg, pConsumerEp);
L
Liu Jicong 已提交
1083
          }
L
Liu Jicong 已提交
1084
          taosArrayRemove(pSub->consumers, ci);
L
Liu Jicong 已提交
1085
          break;
L
Liu Jicong 已提交
1086 1087
        }
      }
L
Liu Jicong 已提交
1088 1089
      char *oldTopicNameDup = strdup(oldTopicName);
      taosArrayPush(pConsumer->recentRemovedTopics, &oldTopicNameDup);
L
Liu Jicong 已提交
1090 1091
      atomic_store_32(&pConsumer->status, MQ_CONSUMER_STATUS__MODIFY);
      /*pSub->status = MQ_SUBSCRIBE_STATUS__DELETED;*/
L
Liu Jicong 已提交
1092 1093 1094 1095 1096
    } else if (newTopicName != NULL) {
      ASSERT(oldTopicName == NULL);

      SMqTopicObj *pTopic = mndAcquireTopic(pMnode, newTopicName);
      if (pTopic == NULL) {
L
Liu Jicong 已提交
1097
        mError("topic being subscribed not exist: %s", newTopicName);
L
Liu Jicong 已提交
1098 1099 1100
        continue;
      }

L
Liu Jicong 已提交
1101 1102
      SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, cgroup, newTopicName);
      bool             createSub = false;
L
Liu Jicong 已提交
1103
      if (pSub == NULL) {
L
Liu Jicong 已提交
1104 1105 1106
        mDebug("create new subscription by consumer %ld, group: %s, topic %s", consumerId, cgroup, newTopicName);
        pSub = mndCreateSubscription(pMnode, pTopic, cgroup);
        createSub = true;
L
Liu Jicong 已提交
1107 1108

        mndCreateOffset(pTrans, cgroup, newTopicName, pSub->unassignedVg);
L
Liu Jicong 已提交
1109
      }
L
Liu Jicong 已提交
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121

      SMqSubConsumer mqSubConsumer;
      mqSubConsumer.consumerId = consumerId;
      mqSubConsumer.vgInfo = taosArrayInit(0, sizeof(SMqConsumerEp));
      taosArrayPush(pSub->consumers, &mqSubConsumer);

      // if have un assigned vg, assign one to the consumer
      if (taosArrayGetSize(pSub->unassignedVg) > 0) {
        SMqConsumerEp *pConsumerEp = taosArrayPop(pSub->unassignedVg);
        pConsumerEp->oldConsumerId = pConsumerEp->consumerId;
        pConsumerEp->consumerId = consumerId;
        taosArrayPush(mqSubConsumer.vgInfo, pConsumerEp);
1122
        if (pConsumerEp->oldConsumerId == -1) {
L
Liu Jicong 已提交
1123 1124
          mInfo("mq set conn: assign vgroup %d of topic %s to consumer %ld", pConsumerEp->vgId, newTopicName,
                pConsumerEp->consumerId);
1125 1126 1127 1128
          mndPersistMqSetConnReq(pMnode, pTrans, pTopic, cgroup, pConsumerEp);
        } else {
          mndPersistRebalanceMsg(pMnode, pTrans, pConsumerEp);
        }
L
Liu Jicong 已提交
1129
        // to trigger rebalance at once, do not set status active
1130
        /*atomic_store_32(&pConsumer->status, MQ_CONSUMER_STATUS__ACTIVE);*/
L
Liu Jicong 已提交
1131
      }
L
Liu Jicong 已提交
1132

L
Liu Jicong 已提交
1133
      SSdbRaw *pRaw = mndSubActionEncode(pSub);
L
Liu Jicong 已提交
1134
      sdbSetRawStatus(pRaw, SDB_STATUS_READY);
L
Liu Jicong 已提交
1135
      mndTransAppendRedolog(pTrans, pRaw);
L
Liu Jicong 已提交
1136

L
Liu Jicong 已提交
1137 1138
      if (!createSub) mndReleaseSubscribe(pMnode, pSub);
      mndReleaseTopic(pMnode, pTopic);
L
Liu Jicong 已提交
1139 1140 1141
    }
  }

L
Liu Jicong 已提交
1142
  if (oldSub) taosArrayDestroyEx(oldSub, free);
L
Liu Jicong 已提交
1143 1144 1145 1146 1147 1148 1149

  // persist consumerObj
  SSdbRaw *pConsumerRaw = mndConsumerActionEncode(pConsumer);
  sdbSetRawStatus(pConsumerRaw, SDB_STATUS_READY);
  mndTransAppendRedolog(pTrans, pConsumerRaw);

  if (mndTransPrepare(pMnode, pTrans) != 0) {
L
Liu Jicong 已提交
1150
    mError("mq-subscribe-trans:%d, failed to prepare since %s", pTrans->id, terrstr());
L
Liu Jicong 已提交
1151
    mndTransDrop(pTrans);
L
Liu Jicong 已提交
1152
    if (!createConsumer) mndReleaseConsumer(pMnode, pConsumer);
L
Liu Jicong 已提交
1153 1154 1155 1156
    return -1;
  }

  mndTransDrop(pTrans);
L
Liu Jicong 已提交
1157
  if (!createConsumer) mndReleaseConsumer(pMnode, pConsumer);
L
Liu Jicong 已提交
1158
  return TSDB_CODE_MND_ACTION_IN_PROGRESS;
L
Liu Jicong 已提交
1159 1160
}

L
Liu Jicong 已提交
1161 1162 1163 1164
static int32_t mndProcessSubscribeInternalRsp(SMnodeMsg *pRsp) {
  mndTransProcessRsp(pRsp);
  return 0;
}
L
Liu Jicong 已提交
1165 1166 1167 1168 1169

static void mndCancelGetNextConsumer(SMnode *pMnode, void *pIter) {
  SSdb *pSdb = pMnode->pSdb;
  sdbCancelFetch(pSdb, pIter);
}