mndConsumer.c 42.7 KB
Newer Older
L
Liu Jicong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE
#include "mndConsumer.h"
L
Liu Jicong 已提交
18
#include "mndPrivilege.h"
L
Liu Jicong 已提交
19
#include "mndShow.h"
20
#include "mndSubscribe.h"
L
Liu Jicong 已提交
21 22
#include "mndTopic.h"
#include "mndTrans.h"
L
Liu Jicong 已提交
23
#include "tcompare.h"
L
Liu Jicong 已提交
24 25
#include "tname.h"

26
#define MND_CONSUMER_VER_NUMBER   2
L
Liu Jicong 已提交
27 28
#define MND_CONSUMER_RESERVE_SIZE 64

H
Haojun Liao 已提交
29
#define MND_CONSUMER_LOST_HB_CNT          6
L
Liu Jicong 已提交
30
#define MND_CONSUMER_LOST_CLEAR_THRESHOLD 43200
31

H
Haojun Liao 已提交
32
static int32_t mqRebInExecCnt = 0;
33

L
Liu Jicong 已提交
34 35
static const char *mndConsumerStatusName(int status);

L
Liu Jicong 已提交
36 37
static int32_t mndConsumerActionInsert(SSdb *pSdb, SMqConsumerObj *pConsumer);
static int32_t mndConsumerActionDelete(SSdb *pSdb, SMqConsumerObj *pConsumer);
38
static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, SMqConsumerObj *pNewConsumer);
S
Shengliang Guan 已提交
39
static int32_t mndProcessConsumerMetaMsg(SRpcMsg *pMsg);
40
static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows);
L
Liu Jicong 已提交
41
static void    mndCancelGetNextConsumer(SMnode *pMnode, void *pIter);
L
Liu Jicong 已提交
42

S
Shengliang Guan 已提交
43 44
static int32_t mndProcessSubscribeReq(SRpcMsg *pMsg);
static int32_t mndProcessAskEpReq(SRpcMsg *pMsg);
45
static int32_t mndProcessMqHbReq(SRpcMsg *pMsg);
S
Shengliang Guan 已提交
46 47
static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg);
static int32_t mndProcessConsumerLostMsg(SRpcMsg *pMsg);
L
Liu Jicong 已提交
48
static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg);
S
Shengliang Guan 已提交
49
static int32_t mndProcessConsumerRecoverMsg(SRpcMsg *pMsg);
50

L
Liu Jicong 已提交
51
int32_t mndInitConsumer(SMnode *pMnode) {
S
Shengliang Guan 已提交
52 53 54 55 56 57 58 59 60
  SSdbTable table = {
      .sdbType = SDB_CONSUMER,
      .keyType = SDB_KEY_INT64,
      .encodeFp = (SdbEncodeFp)mndConsumerActionEncode,
      .decodeFp = (SdbDecodeFp)mndConsumerActionDecode,
      .insertFp = (SdbInsertFp)mndConsumerActionInsert,
      .updateFp = (SdbUpdateFp)mndConsumerActionUpdate,
      .deleteFp = (SdbDeleteFp)mndConsumerActionDelete,
  };
L
Liu Jicong 已提交
61

L
Liu Jicong 已提交
62 63 64
  mndSetMsgHandle(pMnode, TDMT_MND_TMQ_SUBSCRIBE, mndProcessSubscribeReq);
  mndSetMsgHandle(pMnode, TDMT_MND_TMQ_HB, mndProcessMqHbReq);
  mndSetMsgHandle(pMnode, TDMT_MND_TMQ_ASK_EP, mndProcessAskEpReq);
L
Liu Jicong 已提交
65
  mndSetMsgHandle(pMnode, TDMT_MND_TMQ_TIMER, mndProcessMqTimerMsg);
L
Liu Jicong 已提交
66 67
  mndSetMsgHandle(pMnode, TDMT_MND_TMQ_CONSUMER_LOST, mndProcessConsumerLostMsg);
  mndSetMsgHandle(pMnode, TDMT_MND_TMQ_CONSUMER_RECOVER, mndProcessConsumerRecoverMsg);
L
Liu Jicong 已提交
68
  mndSetMsgHandle(pMnode, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, mndProcessConsumerClearMsg);
L
Liu Jicong 已提交
69 70 71 72

  mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_CONSUMERS, mndRetrieveConsumer);
  mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_CONSUMERS, mndCancelGetNextConsumer);

L
Liu Jicong 已提交
73 74 75 76 77
  return sdbSetTable(pMnode->pSdb, table);
}

void mndCleanupConsumer(SMnode *pMnode) {}

L
Liu Jicong 已提交
78
bool mndRebTryStart() {
H
Haojun Liao 已提交
79
  int32_t old = atomic_val_compare_exchange_32(&mqRebInExecCnt, 0, 1);
C
cadem 已提交
80
  mDebug("tq timer, rebalance counter old val:%d", old);
L
Liu Jicong 已提交
81 82 83
  return old == 0;
}

X
Xiaoyu Wang 已提交
84
void mndRebEnd() { mndRebCntDec(); }
L
Liu Jicong 已提交
85

S
Shengliang Guan 已提交
86 87
void mndRebCntInc() {
  int32_t val = atomic_add_fetch_32(&mqRebInExecCnt, 1);
88
  mInfo("rebalance trans start, rebalance counter:%d", val);
S
Shengliang Guan 已提交
89
}
L
Liu Jicong 已提交
90

S
Shengliang Guan 已提交
91
void mndRebCntDec() {
92 93 94 95 96 97 98 99 100 101
  while (1) {
    int32_t val = atomic_load_32(&mqRebInExecCnt);
    if (val <= 0) {
      mError("rebalance trans end, rebalance counter:%d should not be less equalled than 0, ignore counter desc", val);
      break;
    }

    int32_t newVal = val - 1;
    int32_t oldVal = atomic_val_compare_exchange_32(&mqRebInExecCnt, val, newVal);
    if (oldVal == val) {
C
cadem 已提交
102
      mDebug("rebalance trans end, rebalance counter:%d", newVal);
103 104 105
      break;
    }
  }
S
Shengliang Guan 已提交
106
}
L
Liu Jicong 已提交
107

S
Shengliang Guan 已提交
108 109 110
static int32_t mndProcessConsumerLostMsg(SRpcMsg *pMsg) {
  SMnode             *pMnode = pMsg->info.node;
  SMqConsumerLostMsg *pLostMsg = pMsg->pCont;
111
  SMqConsumerObj     *pConsumer = mndAcquireConsumer(pMnode, pLostMsg->consumerId);
L
Liu Jicong 已提交
112 113 114
  if (pConsumer == NULL) {
    return 0;
  }
115

X
Xiaoyu Wang 已提交
116
  mInfo("process consumer lost msg, consumer:0x%" PRIx64 " status:%d(%s)", pLostMsg->consumerId, pConsumer->status,
L
Liu Jicong 已提交
117 118
        mndConsumerStatusName(pConsumer->status));

D
dapan1121 已提交
119 120 121 122 123
  if (pConsumer->status != MQ_CONSUMER_STATUS__READY) {
    mndReleaseConsumer(pMnode, pConsumer);
    return -1;
  }

124 125 126 127 128
  SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumer->consumerId, pConsumer->cgroup);
  pConsumerNew->updateType = CONSUMER_UPDATE__LOST;

  mndReleaseConsumer(pMnode, pConsumer);

129
  STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_NOTHING, pMsg, "lost-csm");
H
Haojun Liao 已提交
130 131 132 133 134 135 136 137 138 139 140
  if (pTrans == NULL) {
    goto FAIL;
  }

  if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) {
    goto FAIL;
  }

  if (mndTransPrepare(pMnode, pTrans) != 0) {
    goto FAIL;
  }
141

L
Liu Jicong 已提交
142
  tDeleteSMqConsumerObj(pConsumerNew);
143 144
  taosMemoryFree(pConsumerNew);
  mndTransDrop(pTrans);
145 146
  return 0;
FAIL:
L
Liu Jicong 已提交
147
  tDeleteSMqConsumerObj(pConsumerNew);
148
  taosMemoryFree(pConsumerNew);
149 150 151 152
  mndTransDrop(pTrans);
  return -1;
}

S
Shengliang Guan 已提交
153 154 155
static int32_t mndProcessConsumerRecoverMsg(SRpcMsg *pMsg) {
  SMnode                *pMnode = pMsg->info.node;
  SMqConsumerRecoverMsg *pRecoverMsg = pMsg->pCont;
L
Liu Jicong 已提交
156
  SMqConsumerObj        *pConsumer = mndAcquireConsumer(pMnode, pRecoverMsg->consumerId);
L
Liu Jicong 已提交
157 158 159 160
  if (pConsumer == NULL) {
    mError("cannot find consumer %" PRId64 " when processing consumer recover msg", pRecoverMsg->consumerId);
    return -1;
  }
L
Liu Jicong 已提交
161

H
Haojun Liao 已提交
162 163
  mInfo("receive consumer recover msg, consumer:0x%" PRIx64 " status:%d(%s)", pRecoverMsg->consumerId,
        pConsumer->status, mndConsumerStatusName(pConsumer->status));
L
Liu Jicong 已提交
164

L
Liu Jicong 已提交
165
  if (pConsumer->status != MQ_CONSUMER_STATUS__LOST_REBD) {
L
Liu Jicong 已提交
166
    mndReleaseConsumer(pMnode, pConsumer);
L
Liu Jicong 已提交
167
    terrno = TSDB_CODE_MND_CONSUMER_NOT_READY;
L
Liu Jicong 已提交
168 169 170
    return -1;
  }

L
Liu Jicong 已提交
171 172 173 174 175
  SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumer->consumerId, pConsumer->cgroup);
  pConsumerNew->updateType = CONSUMER_UPDATE__RECOVER;

  mndReleaseConsumer(pMnode, pConsumer);

176
  STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_NOTHING, pMsg, "recover-csm");
H
Haojun Liao 已提交
177 178 179 180
  if (pTrans == NULL) {
    goto FAIL;
  }

L
Liu Jicong 已提交
181 182 183
  if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) goto FAIL;
  if (mndTransPrepare(pMnode, pTrans) != 0) goto FAIL;

L
Liu Jicong 已提交
184 185
  tDeleteSMqConsumerObj(pConsumerNew);
  taosMemoryFree(pConsumerNew);
L
Liu Jicong 已提交
186 187 188 189
  mndTransDrop(pTrans);
  return 0;
FAIL:
  tDeleteSMqConsumerObj(pConsumerNew);
L
Liu Jicong 已提交
190
  taosMemoryFree(pConsumerNew);
L
Liu Jicong 已提交
191 192 193 194
  mndTransDrop(pTrans);
  return -1;
}

195
// todo check the clear process
L
Liu Jicong 已提交
196 197 198
static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg) {
  SMnode              *pMnode = pMsg->info.node;
  SMqConsumerClearMsg *pClearMsg = pMsg->pCont;
H
Haojun Liao 已提交
199 200

  SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, pClearMsg->consumerId);
L
Liu Jicong 已提交
201
  if (pConsumer == NULL) {
H
Haojun Liao 已提交
202
    mError("consumer:0x%"PRIx64" failed to be found to clear it", pClearMsg->consumerId);
L
Liu Jicong 已提交
203 204 205
    return 0;
  }

H
Haojun Liao 已提交
206
  mInfo("consumer:0x%" PRIx64 " needs to be cleared, status %s", pClearMsg->consumerId,
L
Liu Jicong 已提交
207 208 209 210 211 212 213 214 215 216 217 218 219 220
        mndConsumerStatusName(pConsumer->status));

  if (pConsumer->status != MQ_CONSUMER_STATUS__LOST_REBD) {
    mndReleaseConsumer(pMnode, pConsumer);
    return -1;
  }

  SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumer->consumerId, pConsumer->cgroup);
  pConsumerNew->updateType = CONSUMER_UPDATE__LOST;

  mndReleaseConsumer(pMnode, pConsumer);

  STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_NOTHING, pMsg, "clear-csm");
  if (pTrans == NULL) goto FAIL;
H
Haojun Liao 已提交
221 222

  // this is the drop action, not the update action
L
Liu Jicong 已提交
223 224 225 226 227 228 229
  if (mndSetConsumerDropLogs(pMnode, pTrans, pConsumerNew) != 0) goto FAIL;
  if (mndTransPrepare(pMnode, pTrans) != 0) goto FAIL;

  tDeleteSMqConsumerObj(pConsumerNew);
  taosMemoryFree(pConsumerNew);
  mndTransDrop(pTrans);
  return 0;
230

L
Liu Jicong 已提交
231 232 233 234 235 236 237
FAIL:
  tDeleteSMqConsumerObj(pConsumerNew);
  taosMemoryFree(pConsumerNew);
  mndTransDrop(pTrans);
  return -1;
}

L
Liu Jicong 已提交
238
static SMqRebInfo *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) {
L
Liu Jicong 已提交
239 240 241 242
  SMqRebInfo *pRebInfo = taosHashGet(pHash, key, strlen(key) + 1);
  if (pRebInfo == NULL) {
    pRebInfo = tNewSMqRebSubscribe(key);
    if (pRebInfo == NULL) {
243 244 245
      terrno = TSDB_CODE_OUT_OF_MEMORY;
      return NULL;
    }
L
Liu Jicong 已提交
246 247 248
    taosHashPut(pHash, key, strlen(key) + 1, pRebInfo, sizeof(SMqRebInfo));
    taosMemoryFree(pRebInfo);
    pRebInfo = taosHashGet(pHash, key, strlen(key) + 1);
249
  }
L
Liu Jicong 已提交
250
  return pRebInfo;
251 252
}

X
Xiaoyu Wang 已提交
253 254
static void freeRebalanceItem(void *param) {
  SMqRebInfo *pInfo = param;
H
Haojun Liao 已提交
255 256 257 258
  taosArrayDestroy(pInfo->newConsumers);
  taosArrayDestroy(pInfo->removedConsumers);
}

S
Shengliang Guan 已提交
259 260
static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) {
  SMnode         *pMnode = pMsg->info.node;
261 262 263 264
  SSdb           *pSdb = pMnode->pSdb;
  SMqConsumerObj *pConsumer;
  void           *pIter = NULL;

D
dapan1121 已提交
265
  mDebug("start to process mq timer");
266

267
  // rebalance cannot be parallel
L
Liu Jicong 已提交
268
  if (!mndRebTryStart()) {
D
dapan1121 已提交
269
    mDebug("mq rebalance already in progress, do nothing");
270 271 272 273
    return 0;
  }

  SMqDoRebalanceMsg *pRebMsg = rpcMallocCont(sizeof(SMqDoRebalanceMsg));
H
Haojun Liao 已提交
274
  if (pRebMsg == NULL) {
X
Xiaoyu Wang 已提交
275
    mError("failed to create the rebalance msg, size:%d, quit mq timer", (int32_t)sizeof(SMqDoRebalanceMsg));
H
Haojun Liao 已提交
276 277 278 279
    mndRebEnd();
    return TSDB_CODE_OUT_OF_MEMORY;
  }

280
  pRebMsg->rebSubHash = taosHashInit(64, MurmurHash3_32, true, HASH_NO_LOCK);
H
Haojun Liao 已提交
281 282 283 284 285 286 287 288
  if (pRebMsg->rebSubHash == NULL) {
    mError("failed to create rebalance hashmap");
    rpcFreeCont(pRebMsg);
    mndRebEnd();
    return TSDB_CODE_OUT_OF_MEMORY;
  }

  taosHashSetFreeFp(pRebMsg->rebSubHash, freeRebalanceItem);
289 290 291 292

  // iterate all consumers, find all modification
  while (1) {
    pIter = sdbFetch(pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer);
293 294 295
    if (pIter == NULL) {
      break;
    }
296 297 298

    int32_t hbStatus = atomic_add_fetch_32(&pConsumer->hbStatus, 1);
    int32_t status = atomic_load_32(&pConsumer->status);
H
Haojun Liao 已提交
299

X
Xiaoyu Wang 已提交
300 301 302
    mDebug("check for consumer:0x%" PRIx64 " status:%d(%s), sub-time:%" PRId64 ", uptime:%" PRId64 ", hbstatus:%d",
           pConsumer->consumerId, status, mndConsumerStatusName(status), pConsumer->subscribeTime, pConsumer->upTime,
           hbStatus);
L
Liu Jicong 已提交
303 304

    if (status == MQ_CONSUMER_STATUS__READY) {
H
Haojun Liao 已提交
305 306
      if (hbStatus > MND_CONSUMER_LOST_HB_CNT) {
        SMqConsumerLostMsg *pLostMsg = rpcMallocCont(sizeof(SMqConsumerLostMsg));
H
Haojun Liao 已提交
307 308
        if (pLostMsg == NULL) {
          mError("consumer:0x%"PRIx64" failed to transfer consumer status to lost due to out of memory. alloc size:%d",
H
Haojun Liao 已提交
309
              pConsumer->consumerId, (int32_t)sizeof(SMqConsumerLostMsg));
H
Haojun Liao 已提交
310 311
          continue;
        }
H
Haojun Liao 已提交
312 313 314

        pLostMsg->consumerId = pConsumer->consumerId;
        SRpcMsg rpcMsg = {
315
            .msgType = TDMT_MND_TMQ_CONSUMER_LOST, .pCont = pLostMsg, .contLen = sizeof(SMqConsumerLostMsg)};
316

317 318
        mDebug("consumer:0x%"PRIx64" hb not received beyond threshold %d, set to lost", pConsumer->consumerId,
            MND_CONSUMER_LOST_HB_CNT);
H
Haojun Liao 已提交
319 320
        tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg);
      }
L
Liu Jicong 已提交
321
    } else if (status == MQ_CONSUMER_STATUS__LOST_REBD) {
H
Haojun Liao 已提交
322
      // if the client is lost longer than one day, clear it. Otherwise, do nothing about the lost consumers.
L
Liu Jicong 已提交
323 324
      if (hbStatus > MND_CONSUMER_LOST_CLEAR_THRESHOLD) {
        SMqConsumerClearMsg *pClearMsg = rpcMallocCont(sizeof(SMqConsumerClearMsg));
H
Haojun Liao 已提交
325 326
        if (pClearMsg == NULL) {
          mError("consumer:0x%"PRIx64" failed to clear consumer due to out of memory. alloc size:%d",
H
Haojun Liao 已提交
327
                 pConsumer->consumerId, (int32_t)sizeof(SMqConsumerClearMsg));
H
Haojun Liao 已提交
328 329
          continue;
        }
L
Liu Jicong 已提交
330 331 332

        pClearMsg->consumerId = pConsumer->consumerId;
        SRpcMsg rpcMsg = {
333
            .msgType = TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, .pCont = pClearMsg, .contLen = sizeof(SMqConsumerClearMsg)};
334

335 336
        mDebug("consumer:0x%" PRIx64 " lost beyond threshold %d, clear it", pConsumer->consumerId,
               MND_CONSUMER_LOST_CLEAR_THRESHOLD);
L
Liu Jicong 已提交
337 338
        tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg);
      }
339
    } else if (status == MQ_CONSUMER_STATUS__LOST) {
L
Liu Jicong 已提交
340
      taosRLockLatch(&pConsumer->lock);
341 342 343 344 345
      int32_t topicNum = taosArrayGetSize(pConsumer->currentTopics);
      for (int32_t i = 0; i < topicNum; i++) {
        char  key[TSDB_SUBSCRIBE_KEY_LEN];
        char *removedTopic = taosArrayGetP(pConsumer->currentTopics, i);
        mndMakeSubscribeKey(key, pConsumer->cgroup, removedTopic);
L
Liu Jicong 已提交
346
        SMqRebInfo *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
347 348
        taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
      }
L
Liu Jicong 已提交
349
      taosRUnLockLatch(&pConsumer->lock);
350
    } else {  // MQ_CONSUMER_STATUS_REBALANCE
L
Liu Jicong 已提交
351
      taosRLockLatch(&pConsumer->lock);
352

353 354 355 356 357
      int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics);
      for (int32_t i = 0; i < newTopicNum; i++) {
        char  key[TSDB_SUBSCRIBE_KEY_LEN];
        char *newTopic = taosArrayGetP(pConsumer->rebNewTopics, i);
        mndMakeSubscribeKey(key, pConsumer->cgroup, newTopic);
L
Liu Jicong 已提交
358
        SMqRebInfo *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
359 360 361 362 363 364 365 366
        taosArrayPush(pRebSub->newConsumers, &pConsumer->consumerId);
      }

      int32_t removedTopicNum = taosArrayGetSize(pConsumer->rebRemovedTopics);
      for (int32_t i = 0; i < removedTopicNum; i++) {
        char  key[TSDB_SUBSCRIBE_KEY_LEN];
        char *removedTopic = taosArrayGetP(pConsumer->rebRemovedTopics, i);
        mndMakeSubscribeKey(key, pConsumer->cgroup, removedTopic);
L
Liu Jicong 已提交
367
        SMqRebInfo *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
368 369
        taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
      }
L
Liu Jicong 已提交
370
      taosRUnLockLatch(&pConsumer->lock);
371 372 373 374 375 376 377 378
    }

    mndReleaseConsumer(pMnode, pConsumer);
  }

  if (taosHashGetSize(pRebMsg->rebSubHash) != 0) {
    mInfo("mq rebalance will be triggered");
    SRpcMsg rpcMsg = {
L
Liu Jicong 已提交
379
        .msgType = TDMT_MND_TMQ_DO_REBALANCE,
380 381 382 383 384 385 386
        .pCont = pRebMsg,
        .contLen = sizeof(SMqDoRebalanceMsg),
    };
    tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg);
  } else {
    taosHashCleanup(pRebMsg->rebSubHash);
    rpcFreeCont(pRebMsg);
H
Haojun Liao 已提交
387
    mDebug("mq timer finished, no need to re-balance");
L
Liu Jicong 已提交
388
    mndRebEnd();
389 390 391 392
  }
  return 0;
}

393
static int32_t mndProcessMqHbReq(SRpcMsg *pMsg) {
394
  int32_t code = 0;
L
Liu Jicong 已提交
395 396
  SMnode  *pMnode = pMsg->info.node;
  SMqHbReq req = {0};
397

398
  if ((code = tDeserializeSMqHbReq(pMsg->pCont, pMsg->contLen, &req)) < 0) {
D
dapan1121 已提交
399
    terrno = TSDB_CODE_OUT_OF_MEMORY;
400
    goto end;
D
dapan1121 已提交
401 402
  }

L
Liu Jicong 已提交
403
  int64_t         consumerId = req.consumerId;
404
  SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, consumerId);
L
Liu Jicong 已提交
405
  if (pConsumer == NULL) {
X
Xiaoyu Wang 已提交
406
    mError("consumer:0x%" PRIx64 " not exist", consumerId);
L
Liu Jicong 已提交
407
    terrno = TSDB_CODE_MND_CONSUMER_NOT_EXIST;
408 409
    code = -1;
    goto end;
L
Liu Jicong 已提交
410
  }
411 412 413 414 415 416

  atomic_store_32(&pConsumer->hbStatus, 0);

  int32_t status = atomic_load_32(&pConsumer->status);

  if (status == MQ_CONSUMER_STATUS__LOST_REBD) {
X
Xiaoyu Wang 已提交
417
    mInfo("try to recover consumer:0x%" PRIx64 "", consumerId);
418 419 420
    SMqConsumerRecoverMsg *pRecoverMsg = rpcMallocCont(sizeof(SMqConsumerRecoverMsg));

    pRecoverMsg->consumerId = consumerId;
421
    SRpcMsg pRpcMsg = {
L
Liu Jicong 已提交
422
        .msgType = TDMT_MND_TMQ_CONSUMER_RECOVER,
423 424 425 426
        .pCont = pRecoverMsg,
        .contLen = sizeof(SMqConsumerRecoverMsg),
    };
    tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &pRpcMsg);
427 428
  }

429 430 431 432 433 434 435
  for(int i = 0; i < taosArrayGetSize(req.topics); i++){
    TopicOffsetRows* data = taosArrayGet(req.topics, i);
    mDebug("heartbeat report offset rows.%s:%s", pConsumer->cgroup, data->topicName);

    SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, pConsumer->cgroup, data->topicName);
    taosRLockLatch(&pSub->lock);
    SMqConsumerEp *pConsumerEp = taosHashGet(pSub->consumerHash, &consumerId, sizeof(int64_t));
436 437 438 439
    if(pConsumerEp){
      pConsumerEp->offsetRows = data->offsetRows;
      data->offsetRows = NULL;
    }
440 441 442 443 444
    taosRUnLockLatch(&pSub->lock);

    mndReleaseSubscribe(pMnode, pSub);
  }

445 446
  mndReleaseConsumer(pMnode, pConsumer);

447 448 449
end:
  tDeatroySMqHbReq(&req);
  return code;
450 451
}

S
Shengliang Guan 已提交
452
static int32_t mndProcessAskEpReq(SRpcMsg *pMsg) {
L
Liu Jicong 已提交
453 454 455
  SMnode     *pMnode = pMsg->info.node;
  SMqAskEpReq req = {0};
  SMqAskEpRsp rsp = {0};
D
dapan1121 已提交
456 457 458 459 460 461

  if (tDeserializeSMqAskEpReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

L
Liu Jicong 已提交
462 463
  int64_t consumerId = req.consumerId;
  int32_t epoch = req.epoch;
464

465
  SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, consumerId);
L
Liu Jicong 已提交
466
  if (pConsumer == NULL) {
H
Haojun Liao 已提交
467
    mError("consumer:0x%" PRIx64 " group:%s not exists in sdb", consumerId, req.cgroup);
468 469 470 471
    terrno = TSDB_CODE_MND_CONSUMER_NOT_EXIST;
    return -1;
  }

H
Haojun Liao 已提交
472 473 474 475 476 477 478
  int32_t ret = strncmp(req.cgroup, pConsumer->cgroup, tListLen(pConsumer->cgroup));
  if (ret != 0) {
    mError("consumer:0x%" PRIx64 " group:%s not consistent with data in sdb, saved cgroup:%s", consumerId, req.cgroup,
           pConsumer->cgroup);
    terrno = TSDB_CODE_MND_CONSUMER_NOT_EXIST;
    return -1;
  }
479

480 481 482 483
  atomic_store_32(&pConsumer->hbStatus, 0);

  // 1. check consumer status
  int32_t status = atomic_load_32(&pConsumer->status);
L
Liu Jicong 已提交
484

L
Liu Jicong 已提交
485
  if (status == MQ_CONSUMER_STATUS__LOST_REBD) {
X
Xiaoyu Wang 已提交
486
    mInfo("try to recover consumer:0x%" PRIx64, consumerId);
L
Liu Jicong 已提交
487 488 489
    SMqConsumerRecoverMsg *pRecoverMsg = rpcMallocCont(sizeof(SMqConsumerRecoverMsg));

    pRecoverMsg->consumerId = consumerId;
490
    SRpcMsg pRpcMsg = {
L
Liu Jicong 已提交
491
        .msgType = TDMT_MND_TMQ_CONSUMER_RECOVER,
492 493 494
        .pCont = pRecoverMsg,
        .contLen = sizeof(SMqConsumerRecoverMsg),
    };
H
Haojun Liao 已提交
495

496
    tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &pRpcMsg);
497 498 499
  }

  if (status != MQ_CONSUMER_STATUS__READY) {
X
Xiaoyu Wang 已提交
500
    mInfo("consumer:0x%" PRIx64 " not ready, status: %s", consumerId, mndConsumerStatusName(status));
501 502 503 504 505 506
    terrno = TSDB_CODE_MND_CONSUMER_NOT_READY;
    return -1;
  }

  int32_t serverEpoch = atomic_load_32(&pConsumer->epoch);

507
  // 2. check epoch, only send ep info when epochs do not match
508 509
  if (epoch != serverEpoch) {
    taosRLockLatch(&pConsumer->lock);
X
Xiaoyu Wang 已提交
510 511
    mInfo("process ask ep, consumer:0x%" PRIx64 "(epoch %d) update with server epoch %d", consumerId, epoch,
          serverEpoch);
512 513 514 515 516
    int32_t numOfTopics = taosArrayGetSize(pConsumer->currentTopics);

    rsp.topics = taosArrayInit(numOfTopics, sizeof(SMqSubTopicEp));
    if (rsp.topics == NULL) {
      terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
517
      taosRUnLockLatch(&pConsumer->lock);
518 519 520
      goto FAIL;
    }

H
Haojun Liao 已提交
521
    // handle all topics subscribed by this consumer
522 523 524 525
    for (int32_t i = 0; i < numOfTopics; i++) {
      char            *topic = taosArrayGetP(pConsumer->currentTopics, i);
      SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, pConsumer->cgroup, topic);
      // txn guarantees pSub is created
L
Liu Jicong 已提交
526

527 528 529 530 531 532 533 534
      taosRLockLatch(&pSub->lock);

      SMqSubTopicEp topicEp = {0};
      strcpy(topicEp.topic, topic);

      // 2.1 fetch topic schema
      SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic);
      taosRLockLatch(&pTopic->lock);
L
Liu Jicong 已提交
535
      tstrncpy(topicEp.db, pTopic->db, TSDB_DB_FNAME_LEN);
536
      topicEp.schema.nCols = pTopic->schema.nCols;
L
Liu Jicong 已提交
537 538 539 540
      if (topicEp.schema.nCols) {
        topicEp.schema.pSchema = taosMemoryCalloc(topicEp.schema.nCols, sizeof(SSchema));
        memcpy(topicEp.schema.pSchema, pTopic->schema.pSchema, topicEp.schema.nCols * sizeof(SSchema));
      }
541 542 543 544
      taosRUnLockLatch(&pTopic->lock);
      mndReleaseTopic(pMnode, pTopic);

      // 2.2 iterate all vg assigned to the consumer of that topic
545 546
      SMqConsumerEp *pConsumerEp = taosHashGet(pSub->consumerHash, &consumerId, sizeof(int64_t));
      int32_t        vgNum = taosArrayGetSize(pConsumerEp->vgs);
547

H
Haojun Liao 已提交
548
      // this customer assigned vgroups
549 550 551
      topicEp.vgs = taosArrayInit(vgNum, sizeof(SMqSubVgEp));
      if (topicEp.vgs == NULL) {
        terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
552
        taosRUnLockLatch(&pConsumer->lock);
L
Liu Jicong 已提交
553 554
        taosRUnLockLatch(&pSub->lock);
        mndReleaseSubscribe(pMnode, pSub);
555 556 557 558
        goto FAIL;
      }

      for (int32_t j = 0; j < vgNum; j++) {
559
        SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j);
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577
        char     offsetKey[TSDB_PARTITION_KEY_LEN];
        mndMakePartitionKey(offsetKey, pConsumer->cgroup, topic, pVgEp->vgId);
        // 2.2.1 build vg ep
        SMqSubVgEp vgEp = {
            .epSet = pVgEp->epSet,
            .vgId = pVgEp->vgId,
            .offset = -1,
        };

        taosArrayPush(topicEp.vgs, &vgEp);
      }
      taosArrayPush(rsp.topics, &topicEp);

      taosRUnLockLatch(&pSub->lock);
      mndReleaseSubscribe(pMnode, pSub);
    }
    taosRUnLockLatch(&pConsumer->lock);
  }
H
Haojun Liao 已提交
578

579
  // encode rsp
L
Liu Jicong 已提交
580
  int32_t tlen = sizeof(SMqRspHead) + tEncodeSMqAskEpRsp(NULL, &rsp);
581 582
  void   *buf = rpcMallocCont(tlen);
  if (buf == NULL) {
L
Liu Jicong 已提交
583
    terrno = TSDB_CODE_OUT_OF_MEMORY;
584
    return -1;
L
Liu Jicong 已提交
585
  }
H
Haojun Liao 已提交
586

587 588 589 590 591 592 593 594
  SMqRspHead* pHead = buf;

  pHead->mqMsgType = TMQ_MSG_TYPE__EP_RSP;
  pHead->epoch = serverEpoch;
  pHead->consumerId = pConsumer->consumerId;
  pHead->walsver = 0;
  pHead->walever = 0;

S
Shengliang Guan 已提交
595

596
  void *abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));
L
Liu Jicong 已提交
597
  tEncodeSMqAskEpRsp(&abuf, &rsp);
598 599

  // release consumer and free memory
L
Liu Jicong 已提交
600
  tDeleteSMqAskEpRsp(&rsp);
601 602 603
  mndReleaseConsumer(pMnode, pConsumer);

  // send rsp
S
Shengliang Guan 已提交
604 605
  pMsg->info.rsp = buf;
  pMsg->info.rspLen = tlen;
606
  return 0;
H
Haojun Liao 已提交
607

608
FAIL:
L
Liu Jicong 已提交
609
  tDeleteSMqAskEpRsp(&rsp);
610 611 612 613
  mndReleaseConsumer(pMnode, pConsumer);
  return -1;
}

L
Liu Jicong 已提交
614 615 616 617 618 619 620 621
int32_t mndSetConsumerDropLogs(SMnode *pMnode, STrans *pTrans, SMqConsumerObj *pConsumer) {
  SSdbRaw *pCommitRaw = mndConsumerActionEncode(pConsumer);
  if (pCommitRaw == NULL) return -1;
  if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) return -1;
  if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED) != 0) return -1;
  return 0;
}

622 623 624 625 626 627 628 629
int32_t mndSetConsumerCommitLogs(SMnode *pMnode, STrans *pTrans, SMqConsumerObj *pConsumer) {
  SSdbRaw *pCommitRaw = mndConsumerActionEncode(pConsumer);
  if (pCommitRaw == NULL) return -1;
  if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) return -1;
  if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) return -1;
  return 0;
}

X
Xiaoyu Wang 已提交
630
static int32_t validateTopics(const SArray *pTopicList, SMnode *pMnode, const char *pUser) {
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
  int32_t numOfTopics = taosArrayGetSize(pTopicList);

  for (int32_t i = 0; i < numOfTopics; i++) {
    char        *pOneTopic = taosArrayGetP(pTopicList, i);
    SMqTopicObj *pTopic = mndAcquireTopic(pMnode, pOneTopic);
    if (pTopic == NULL) {  // terrno has been set by callee function
      return -1;
    }

    if (mndCheckTopicPrivilege(pMnode, pUser, MND_OPER_SUBSCRIBE, pTopic) != 0) {
      mndReleaseTopic(pMnode, pTopic);
      return -1;
    }

    mndReleaseTopic(pMnode, pTopic);
  }

  return 0;
}

X
Xiaoyu Wang 已提交
651
static void *topicNameDup(void *p) { return taosStrdup((char *)p); }
652

X
Xiaoyu Wang 已提交
653 654
static void freeItem(void *param) {
  void *pItem = *(void **)param;
655 656 657
  if (pItem != NULL) {
    taosMemoryFree(pItem);
  }
658 659
}

660 661 662 663
int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) {
  SMnode *pMnode = pMsg->info.node;
  char   *msgStr = pMsg->pCont;

664 665
  SCMSubscribeReq subscribe = {0};
  tDeserializeSCMSubscribeReq(msgStr, &subscribe);
666 667

  uint64_t        consumerId = subscribe.consumerId;
668
  char           *cgroup = subscribe.cgroup;
H
Haojun Liao 已提交
669
  SMqConsumerObj *pExistedConsumer = NULL;
670 671 672
  SMqConsumerObj *pConsumerNew = NULL;

  int32_t code = -1;
673 674
  SArray *pTopicList = subscribe.topicNames;
  taosArraySort(pTopicList, taosArrayCompareString);
675
  taosArrayRemoveDuplicate(pTopicList, taosArrayCompareString, freeItem);
676

677
  int32_t newTopicNum = taosArrayGetSize(pTopicList);
678

H
Haojun Liao 已提交
679
  // check topic existence
680
  STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_NOTHING, pMsg, "subscribe");
681 682 683
  if (pTrans == NULL) {
    goto _over;
  }
L
Liu Jicong 已提交
684

685 686 687
  code = validateTopics(pTopicList, pMnode, pMsg->info.conn.user);
  if (code != TSDB_CODE_SUCCESS) {
    goto _over;
688 689
  }

H
Haojun Liao 已提交
690 691
  pExistedConsumer = mndAcquireConsumer(pMnode, consumerId);
  if (pExistedConsumer == NULL) {
X
Xiaoyu Wang 已提交
692 693
    mInfo("receive subscribe request from new consumer:0x%" PRIx64 " cgroup:%s, numOfTopics:%d", consumerId,
          subscribe.cgroup, (int32_t)taosArrayGetSize(pTopicList));
L
Liu Jicong 已提交
694

695
    pConsumerNew = tNewSMqConsumerObj(consumerId, cgroup);
696
    tstrncpy(pConsumerNew->clientId, subscribe.clientId, tListLen(pConsumerNew->clientId));
697

698 699 700 701 702 703
    pConsumerNew->withTbName = subscribe.withTbName;
    pConsumerNew->useSnapshot = subscribe.useSnapshot;
    pConsumerNew->autoCommit = subscribe.autoCommit;
    pConsumerNew->autoCommitInterval = subscribe.autoCommitInterval;
    pConsumerNew->resetOffsetCfg = subscribe.resetOffsetCfg;

704
    // set the update type
705
    pConsumerNew->updateType = CONSUMER_UPDATE__REBALANCE;
H
Haojun Liao 已提交
706
    taosArrayDestroy(pConsumerNew->assignedTopics);
707 708 709
    pConsumerNew->assignedTopics = taosArrayDup(pTopicList, topicNameDup);

    // all subscribed topics should re-balance.
L
Liu Jicong 已提交
710
    taosArrayDestroy(pConsumerNew->rebNewTopics);
711
    pConsumerNew->rebNewTopics = pTopicList;
712 713
    subscribe.topicNames = NULL;

714 715
    if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) goto _over;
    if (mndTransPrepare(pMnode, pTrans) != 0) goto _over;
716 717

  } else {
H
Haojun Liao 已提交
718
    int32_t status = atomic_load_32(&pExistedConsumer->status);
L
Liu Jicong 已提交
719

X
Xiaoyu Wang 已提交
720 721 722
    mInfo("receive subscribe request from existed consumer:0x%" PRIx64
          " cgroup:%s, current status:%d(%s), subscribe topic num: %d",
          consumerId, subscribe.cgroup, status, mndConsumerStatusName(status), newTopicNum);
L
Liu Jicong 已提交
723

724 725
    if (status != MQ_CONSUMER_STATUS__READY) {
      terrno = TSDB_CODE_MND_CONSUMER_NOT_READY;
726
      goto _over;
727 728 729 730
    }

    pConsumerNew = tNewSMqConsumerObj(consumerId, cgroup);
    if (pConsumerNew == NULL) {
731
      goto _over;
732
    }
H
Haojun Liao 已提交
733

734
    // set the update type
735
    pConsumerNew->updateType = CONSUMER_UPDATE__REBALANCE;
H
Haojun Liao 已提交
736
    taosArrayDestroy(pConsumerNew->assignedTopics);
737
    pConsumerNew->assignedTopics = taosArrayDup(pTopicList, topicNameDup);
738

X
Xiaoyu Wang 已提交
739
    int32_t oldTopicNum = (pExistedConsumer->currentTopics) ? taosArrayGetSize(pExistedConsumer->currentTopics) : 0;
740 741 742 743

    int32_t i = 0, j = 0;
    while (i < oldTopicNum || j < newTopicNum) {
      if (i >= oldTopicNum) {
H
Haojun Liao 已提交
744
        char *newTopicCopy = taosStrdup(taosArrayGetP(pTopicList, j));
745 746 747 748
        taosArrayPush(pConsumerNew->rebNewTopics, &newTopicCopy);
        j++;
        continue;
      } else if (j >= newTopicNum) {
H
Haojun Liao 已提交
749
        char *oldTopicCopy = taosStrdup(taosArrayGetP(pExistedConsumer->currentTopics, i));
750 751 752 753
        taosArrayPush(pConsumerNew->rebRemovedTopics, &oldTopicCopy);
        i++;
        continue;
      } else {
H
Haojun Liao 已提交
754
        char *oldTopic = taosArrayGetP(pExistedConsumer->currentTopics, i);
755
        char *newTopic = taosArrayGetP(pTopicList, j);
756
        int   comp = strcmp(oldTopic, newTopic);
757 758 759 760 761
        if (comp == 0) {
          i++;
          j++;
          continue;
        } else if (comp < 0) {
762
          char *oldTopicCopy = taosStrdup(oldTopic);
763 764 765 766
          taosArrayPush(pConsumerNew->rebRemovedTopics, &oldTopicCopy);
          i++;
          continue;
        } else {
767
          char *newTopicCopy = taosStrdup(newTopic);
768 769 770 771 772 773 774
          taosArrayPush(pConsumerNew->rebNewTopics, &newTopicCopy);
          j++;
          continue;
        }
      }
    }

775 776
    // no topics need to be rebalanced
    if (taosArrayGetSize(pConsumerNew->rebNewTopics) == 0 && taosArrayGetSize(pConsumerNew->rebRemovedTopics) == 0) {
777
      goto _over;
778 779
    }

780 781
    if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) goto _over;
    if (mndTransPrepare(pMnode, pTrans) != 0) goto _over;
782 783
  }

S
Shengliang Guan 已提交
784
  code = TSDB_CODE_ACTION_IN_PROGRESS;
785

786
_over:
L
Liu Jicong 已提交
787 788
  mndTransDrop(pTrans);

H
Haojun Liao 已提交
789 790 791
  if (pExistedConsumer) {
    /*taosRUnLockLatch(&pExistedConsumer->lock);*/
    mndReleaseConsumer(pMnode, pExistedConsumer);
792
  }
H
Haojun Liao 已提交
793

794 795
  if (pConsumerNew) {
    tDeleteSMqConsumerObj(pConsumerNew);
L
Liu Jicong 已提交
796
    taosMemoryFree(pConsumerNew);
797
  }
798

799
  // TODO: replace with destroy subscribe msg
800
  taosArrayDestroyP(subscribe.topicNames, (FDelete)taosMemoryFree);
801
  return code;
L
Liu Jicong 已提交
802 803
}

L
Liu Jicong 已提交
804 805
SSdbRaw *mndConsumerActionEncode(SMqConsumerObj *pConsumer) {
  terrno = TSDB_CODE_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
806 807

  void   *buf = NULL;
L
Liu Jicong 已提交
808
  int32_t tlen = tEncodeSMqConsumerObj(NULL, pConsumer);
L
Liu Jicong 已提交
809 810 811
  int32_t size = sizeof(int32_t) + tlen + MND_CONSUMER_RESERVE_SIZE;

  SSdbRaw *pRaw = sdbAllocRaw(SDB_CONSUMER, MND_CONSUMER_VER_NUMBER, size);
812
  if (pRaw == NULL) goto CM_ENCODE_OVER;
L
Liu Jicong 已提交
813

wafwerar's avatar
wafwerar 已提交
814
  buf = taosMemoryMalloc(tlen);
L
Liu Jicong 已提交
815 816
  if (buf == NULL) goto CM_ENCODE_OVER;

L
Liu Jicong 已提交
817
  void *abuf = buf;
L
Liu Jicong 已提交
818 819
  tEncodeSMqConsumerObj(&abuf, pConsumer);

L
Liu Jicong 已提交
820
  int32_t dataPos = 0;
L
Liu Jicong 已提交
821 822
  SDB_SET_INT32(pRaw, dataPos, tlen, CM_ENCODE_OVER);
  SDB_SET_BINARY(pRaw, dataPos, buf, tlen, CM_ENCODE_OVER);
L
Liu Jicong 已提交
823 824
  SDB_SET_RESERVE(pRaw, dataPos, MND_CONSUMER_RESERVE_SIZE, CM_ENCODE_OVER);
  SDB_SET_DATALEN(pRaw, dataPos, CM_ENCODE_OVER);
L
Liu Jicong 已提交
825

L
Liu Jicong 已提交
826 827
  terrno = TSDB_CODE_SUCCESS;

828
CM_ENCODE_OVER:
wafwerar's avatar
wafwerar 已提交
829
  taosMemoryFreeClear(buf);
830
  if (terrno != 0) {
831
    mError("consumer:0x%" PRIx64 " failed to encode to raw:%p since %s", pConsumer->consumerId, pRaw, terrstr());
832 833 834 835
    sdbFreeRaw(pRaw);
    return NULL;
  }

836
  mTrace("consumer:0x%" PRIx64 ", encode to raw:%p, row:%p", pConsumer->consumerId, pRaw, pConsumer);
L
Liu Jicong 已提交
837 838 839
  return pRaw;
}

L
Liu Jicong 已提交
840
SSdbRow *mndConsumerActionDecode(SSdbRaw *pRaw) {
841 842 843
  SSdbRow        *pRow = NULL;
  SMqConsumerObj *pConsumer = NULL;
  void           *buf = NULL;
844

H
Haojun Liao 已提交
845
  terrno = 0;
L
Liu Jicong 已提交
846
  int8_t sver = 0;
H
Haojun Liao 已提交
847 848 849
  if (sdbGetRawSoftVer(pRaw, &sver) != 0) {
    goto CM_DECODE_OVER;
  }
L
Liu Jicong 已提交
850

851
  if (sver < 1 || sver > MND_CONSUMER_VER_NUMBER) {
L
Liu Jicong 已提交
852
    terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
L
Liu Jicong 已提交
853
    goto CM_DECODE_OVER;
L
Liu Jicong 已提交
854 855
  }

856
  pRow = sdbAllocRow(sizeof(SMqConsumerObj));
H
Haojun Liao 已提交
857 858 859
  if (pRow == NULL) {
    goto CM_DECODE_OVER;
  }
860

861
  pConsumer = sdbGetRowObj(pRow);
H
Haojun Liao 已提交
862 863 864
  if (pConsumer == NULL) {
    goto CM_DECODE_OVER;
  }
L
Liu Jicong 已提交
865 866

  int32_t dataPos = 0;
L
Liu Jicong 已提交
867
  int32_t len;
L
Liu Jicong 已提交
868
  SDB_GET_INT32(pRaw, dataPos, &len, CM_DECODE_OVER);
wafwerar's avatar
wafwerar 已提交
869
  buf = taosMemoryMalloc(len);
H
Haojun Liao 已提交
870 871 872 873 874
  if (buf == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    goto CM_DECODE_OVER;
  }

L
Liu Jicong 已提交
875 876
  SDB_GET_BINARY(pRaw, dataPos, buf, len, CM_DECODE_OVER);
  SDB_GET_RESERVE(pRaw, dataPos, MND_CONSUMER_RESERVE_SIZE, CM_DECODE_OVER);
L
Liu Jicong 已提交
877

878
  if (tDecodeSMqConsumerObj(buf, pConsumer, sver) == NULL) {
H
Haojun Liao 已提交
879
    terrno = TSDB_CODE_OUT_OF_MEMORY;  // TODO set correct error code
L
Liu Jicong 已提交
880 881
    goto CM_DECODE_OVER;
  }
L
Liu Jicong 已提交
882

H
Haojun Liao 已提交
883
  tmsgUpdateDnodeEpSet(&pConsumer->ep);
L
Liu Jicong 已提交
884

L
Liu Jicong 已提交
885
CM_DECODE_OVER:
wafwerar's avatar
wafwerar 已提交
886
  taosMemoryFreeClear(buf);
L
Liu Jicong 已提交
887
  if (terrno != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
888 889
    mError("consumer:0x%" PRIx64 " failed to decode from raw:%p since %s",
           pConsumer == NULL ? 0 : pConsumer->consumerId, pRaw, terrstr());
wafwerar's avatar
wafwerar 已提交
890
    taosMemoryFreeClear(pRow);
L
Liu Jicong 已提交
891
  }
L
Liu Jicong 已提交
892 893 894 895

  return pRow;
}

L
Liu Jicong 已提交
896
static int32_t mndConsumerActionInsert(SSdb *pSdb, SMqConsumerObj *pConsumer) {
H
Haojun Liao 已提交
897 898 899
  mDebug("consumer:0x%" PRIx64 " cgroup:%s status:%d(%s) epoch:%d load from sdb, perform insert action",
         pConsumer->consumerId, pConsumer->cgroup, pConsumer->status, mndConsumerStatusName(pConsumer->status),
         pConsumer->epoch);
L
Liu Jicong 已提交
900
  pConsumer->subscribeTime = pConsumer->upTime;
L
Liu Jicong 已提交
901 902 903
  return 0;
}

L
Liu Jicong 已提交
904
static int32_t mndConsumerActionDelete(SSdb *pSdb, SMqConsumerObj *pConsumer) {
X
Xiaoyu Wang 已提交
905 906
  mDebug("consumer:0x%" PRIx64 " perform delete action, status:(%d)%s", pConsumer->consumerId, pConsumer->status,
         mndConsumerStatusName(pConsumer->status));
907
  tDeleteSMqConsumerObj(pConsumer);
L
Liu Jicong 已提交
908 909 910
  return 0;
}

X
Xiaoyu Wang 已提交
911
static void updateConsumerStatus(SMqConsumerObj *pConsumer) {
912 913 914
  int32_t status = pConsumer->status;

  if (taosArrayGetSize(pConsumer->rebNewTopics) == 0 && taosArrayGetSize(pConsumer->rebRemovedTopics) == 0) {
915
    if (status == MQ_CONSUMER_STATUS_REBALANCE) {
916
      pConsumer->status = MQ_CONSUMER_STATUS__READY;
wmmhello's avatar
wmmhello 已提交
917
    } else if (status == MQ_CONSUMER_STATUS__LOST) {
H
Haojun Liao 已提交
918
      ASSERT(taosArrayGetSize(pConsumer->currentTopics) == 0);
919 920 921 922 923 924
      pConsumer->status = MQ_CONSUMER_STATUS__LOST_REBD;
    }
  }
}

// remove from new topic
X
Xiaoyu Wang 已提交
925
static void removeFromNewTopicList(SMqConsumerObj *pConsumer, const char *pTopic) {
926
  int32_t size = taosArrayGetSize(pConsumer->rebNewTopics);
927
  for (int32_t i = 0; i < size; i++) {
928 929 930 931 932 933
    char *p = taosArrayGetP(pConsumer->rebNewTopics, i);
    if (strcmp(pTopic, p) == 0) {
      taosArrayRemove(pConsumer->rebNewTopics, i);
      taosMemoryFree(p);

      mDebug("consumer:0x%" PRIx64 " remove new topic:%s in the topic list, remain newTopics:%d", pConsumer->consumerId,
X
Xiaoyu Wang 已提交
934
             pTopic, (int)taosArrayGetSize(pConsumer->rebNewTopics));
935 936 937 938 939 940
      break;
    }
  }
}

// remove from removed topic
X
Xiaoyu Wang 已提交
941
static void removeFromRemoveTopicList(SMqConsumerObj *pConsumer, const char *pTopic) {
942 943 944 945 946 947
  int32_t size = taosArrayGetSize(pConsumer->rebRemovedTopics);
  for (int32_t i = 0; i < size; i++) {
    char *p = taosArrayGetP(pConsumer->rebRemovedTopics, i);
    if (strcmp(pTopic, p) == 0) {
      taosArrayRemove(pConsumer->rebRemovedTopics, i);
      taosMemoryFree(p);
948 949 950

      mDebug("consumer:0x%" PRIx64 " remove topic:%s in the removed topic list, remain removedTopics:%d",
             pConsumer->consumerId, pTopic, (int)taosArrayGetSize(pConsumer->rebRemovedTopics));
951 952 953 954 955
      break;
    }
  }
}

956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
static void removeFromCurrentTopicList(SMqConsumerObj *pConsumer, const char *pTopic) {
  int32_t sz = taosArrayGetSize(pConsumer->currentTopics);
  for (int32_t i = 0; i < sz; i++) {
    char *topic = taosArrayGetP(pConsumer->currentTopics, i);
    if (strcmp(pTopic, topic) == 0) {
      taosArrayRemove(pConsumer->currentTopics, i);
      taosMemoryFree(topic);

      mDebug("consumer:0x%" PRIx64 " remove topic:%s in the current topic list, remain currentTopics:%d",
             pConsumer->consumerId, pTopic, (int)taosArrayGetSize(pConsumer->currentTopics));
      break;
    }
  }
}

static bool existInCurrentTopicList(const SMqConsumerObj* pConsumer, const char* pTopic) {
  bool    existing = false;
  int32_t size = taosArrayGetSize(pConsumer->currentTopics);
  for (int32_t i = 0; i < size; i++) {
    char *topic = taosArrayGetP(pConsumer->currentTopics, i);

    if (strcmp(topic, pTopic) == 0) {
      existing = true;
979 980 981
      break;
    }
  }
982 983

  return existing;
984 985
}

L
Liu Jicong 已提交
986
static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, SMqConsumerObj *pNewConsumer) {
X
Xiaoyu Wang 已提交
987 988
  mDebug("consumer:0x%" PRIx64 " perform update action, update type:%d, subscribe-time:%" PRId64 ", uptime:%" PRId64,
         pOldConsumer->consumerId, pNewConsumer->updateType, pOldConsumer->subscribeTime, pOldConsumer->upTime);
L
Liu Jicong 已提交
989

990 991
  taosWLockLatch(&pOldConsumer->lock);

992 993 994 995
  if (pNewConsumer->updateType == CONSUMER_UPDATE__REBALANCE) {
    TSWAP(pOldConsumer->rebNewTopics, pNewConsumer->rebNewTopics);
    TSWAP(pOldConsumer->rebRemovedTopics, pNewConsumer->rebRemovedTopics);
    TSWAP(pOldConsumer->assignedTopics, pNewConsumer->assignedTopics);
996

997
    pOldConsumer->subscribeTime = pNewConsumer->upTime;
998
    pOldConsumer->status = MQ_CONSUMER_STATUS_REBALANCE;
999 1000 1001
  } else if (pNewConsumer->updateType == CONSUMER_UPDATE__LOST) {
    int32_t sz = taosArrayGetSize(pOldConsumer->currentTopics);
    for (int32_t i = 0; i < sz; i++) {
1002
      char *topic = taosStrdup(taosArrayGetP(pOldConsumer->currentTopics, i));
L
Liu Jicong 已提交
1003
      taosArrayPush(pOldConsumer->rebRemovedTopics, &topic);
1004
    }
L
Liu Jicong 已提交
1005 1006 1007

    pOldConsumer->rebalanceTime = pNewConsumer->upTime;

1008
    int32_t prevStatus = pOldConsumer->status;
1009
    pOldConsumer->status = MQ_CONSUMER_STATUS__LOST;
H
Haojun Liao 已提交
1010
    mDebug("consumer:0x%" PRIx64 " state %s -> %s, reb-time:%" PRId64 ", reb-removed-topics:%d",
1011
           pOldConsumer->consumerId, mndConsumerStatusName(prevStatus), mndConsumerStatusName(pOldConsumer->status),
H
Haojun Liao 已提交
1012
           pOldConsumer->rebalanceTime, (int)taosArrayGetSize(pOldConsumer->rebRemovedTopics));
L
Liu Jicong 已提交
1013 1014 1015
  } else if (pNewConsumer->updateType == CONSUMER_UPDATE__RECOVER) {
    int32_t sz = taosArrayGetSize(pOldConsumer->assignedTopics);
    for (int32_t i = 0; i < sz; i++) {
1016
      char *topic = taosStrdup(taosArrayGetP(pOldConsumer->assignedTopics, i));
L
Liu Jicong 已提交
1017 1018 1019 1020
      taosArrayPush(pOldConsumer->rebNewTopics, &topic);
    }

    pOldConsumer->rebalanceTime = pNewConsumer->upTime;
1021
    pOldConsumer->status = MQ_CONSUMER_STATUS_REBALANCE;
1022 1023
  } else if (pNewConsumer->updateType == CONSUMER_UPDATE__TOUCH) {
    atomic_add_fetch_32(&pOldConsumer->epoch, 1);
L
Liu Jicong 已提交
1024 1025 1026

    pOldConsumer->rebalanceTime = pNewConsumer->upTime;

1027
  } else if (pNewConsumer->updateType == CONSUMER_UPDATE__ADD) {
1028
    char *pNewTopic = taosStrdup(taosArrayGetP(pNewConsumer->rebNewTopics, 0));
1029

1030
    // check if exist in current topic
1031
    removeFromNewTopicList(pOldConsumer, pNewTopic);
1032 1033

    // add to current topic
1034 1035 1036 1037
    bool existing = existInCurrentTopicList(pOldConsumer, pNewTopic);
    if (existing) {
      taosMemoryFree(pNewTopic);
    } else {  // added into current topic list
1038
      taosArrayPush(pOldConsumer->currentTopics, &pNewTopic);
L
Liu Jicong 已提交
1039 1040
      taosArraySort(pOldConsumer->currentTopics, taosArrayCompareString);
    }
H
Haojun Liao 已提交
1041

1042
    // set status
H
Haojun Liao 已提交
1043
    int32_t status = pOldConsumer->status;
1044
    updateConsumerStatus(pOldConsumer);
L
Liu Jicong 已提交
1045

H
Haojun Liao 已提交
1046
    // the re-balance is triggered when the new consumer is launched.
L
Liu Jicong 已提交
1047 1048
    pOldConsumer->rebalanceTime = pNewConsumer->upTime;

1049
    atomic_add_fetch_32(&pOldConsumer->epoch, 1);
1050 1051
    mDebug("consumer:0x%" PRIx64 " state (%d)%s -> (%d)%s, new epoch:%d, reb-time:%" PRId64
           ", current topics:%d, newTopics:%d, removeTopics:%d",
H
Haojun Liao 已提交
1052
           pOldConsumer->consumerId, status, mndConsumerStatusName(status), pOldConsumer->status,
1053 1054 1055 1056
           mndConsumerStatusName(pOldConsumer->status), pOldConsumer->epoch, pOldConsumer->rebalanceTime,
           (int)taosArrayGetSize(pOldConsumer->currentTopics), (int)taosArrayGetSize(pOldConsumer->rebNewTopics),
           (int)taosArrayGetSize(pOldConsumer->rebRemovedTopics));

1057 1058 1059 1060
  } else if (pNewConsumer->updateType == CONSUMER_UPDATE__REMOVE) {
    char *removedTopic = taosArrayGetP(pNewConsumer->rebRemovedTopics, 0);

    // remove from removed topic
1061
    removeFromRemoveTopicList(pOldConsumer, removedTopic);
1062 1063

    // remove from current topic
1064
    removeFromCurrentTopicList(pOldConsumer, removedTopic);
1065 1066

    // set status
H
Haojun Liao 已提交
1067
    int32_t status = pOldConsumer->status;
1068
    updateConsumerStatus(pOldConsumer);
L
Liu Jicong 已提交
1069 1070

    pOldConsumer->rebalanceTime = pNewConsumer->upTime;
1071
    atomic_add_fetch_32(&pOldConsumer->epoch, 1);
H
Haojun Liao 已提交
1072

1073
    mDebug("consumer:0x%" PRIx64 " state (%d)%s -> (%d)%s, new epoch:%d, reb-time:%" PRId64
X
Xiaoyu Wang 已提交
1074
           ", current topics:%d, newTopics:%d, removeTopics:%d",
H
Haojun Liao 已提交
1075
           pOldConsumer->consumerId, status, mndConsumerStatusName(status), pOldConsumer->status,
1076 1077 1078
           mndConsumerStatusName(pOldConsumer->status), pOldConsumer->epoch, pOldConsumer->rebalanceTime,
           (int)taosArrayGetSize(pOldConsumer->currentTopics), (int)taosArrayGetSize(pOldConsumer->rebNewTopics),
           (int)taosArrayGetSize(pOldConsumer->rebRemovedTopics));
1079 1080 1081
  }

  taosWUnLockLatch(&pOldConsumer->lock);
L
Liu Jicong 已提交
1082 1083 1084
  return 0;
}

L
Liu Jicong 已提交
1085
SMqConsumerObj *mndAcquireConsumer(SMnode *pMnode, int64_t consumerId) {
L
Liu Jicong 已提交
1086 1087
  SSdb           *pSdb = pMnode->pSdb;
  SMqConsumerObj *pConsumer = sdbAcquire(pSdb, SDB_CONSUMER, &consumerId);
L
Liu Jicong 已提交
1088
  if (pConsumer == NULL) {
1089
    terrno = TSDB_CODE_MND_CONSUMER_NOT_EXIST;
L
Liu Jicong 已提交
1090 1091 1092 1093
  }
  return pConsumer;
}

L
Liu Jicong 已提交
1094
void mndReleaseConsumer(SMnode *pMnode, SMqConsumerObj *pConsumer) {
L
Liu Jicong 已提交
1095 1096 1097
  SSdb *pSdb = pMnode->pSdb;
  sdbRelease(pSdb, pConsumer);
}
L
Liu Jicong 已提交
1098

S
Shengliang Guan 已提交
1099 1100
static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rowsCapacity) {
  SMnode         *pMnode = pReq->info.node;
L
Liu Jicong 已提交
1101 1102 1103 1104 1105 1106
  SSdb           *pSdb = pMnode->pSdb;
  int32_t         numOfRows = 0;
  SMqConsumerObj *pConsumer = NULL;

  while (numOfRows < rowsCapacity) {
    pShow->pIter = sdbFetch(pSdb, SDB_CONSUMER, pShow->pIter, (void **)&pConsumer);
1107 1108 1109 1110
    if (pShow->pIter == NULL) {
      break;
    }

L
Liu Jicong 已提交
1111
    if (taosArrayGetSize(pConsumer->assignedTopics) == 0) {
X
Xiaoyu Wang 已提交
1112
      mDebug("showing consumer:0x%" PRIx64 " no assigned topic, skip", pConsumer->consumerId);
L
Liu Jicong 已提交
1113 1114 1115
      sdbRelease(pSdb, pConsumer);
      continue;
    }
L
Liu Jicong 已提交
1116 1117

    taosRLockLatch(&pConsumer->lock);
X
Xiaoyu Wang 已提交
1118
    mDebug("showing consumer:0x%" PRIx64, pConsumer->consumerId);
L
Liu Jicong 已提交
1119

L
Liu Jicong 已提交
1120 1121 1122 1123 1124 1125 1126
    int32_t topicSz = taosArrayGetSize(pConsumer->assignedTopics);
    bool    hasTopic = true;
    if (topicSz == 0) {
      hasTopic = false;
      topicSz = 1;
    }

L
Liu Jicong 已提交
1127 1128 1129 1130
    if (numOfRows + topicSz > rowsCapacity) {
      blockDataEnsureCapacity(pBlock, numOfRows + topicSz);
    }

L
Liu Jicong 已提交
1131 1132 1133 1134 1135 1136
    for (int32_t i = 0; i < topicSz; i++) {
      SColumnInfoData *pColInfo;
      int32_t          cols = 0;

      // consumer id
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
1137
      colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->consumerId, false);
L
Liu Jicong 已提交
1138

L
Liu Jicong 已提交
1139 1140
      // consumer group
      char cgroup[TSDB_CGROUP_LEN + VARSTR_HEADER_SIZE] = {0};
1141 1142
      STR_TO_VARSTR(cgroup, pConsumer->cgroup);

L
Liu Jicong 已提交
1143
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
1144
      colDataSetVal(pColInfo, numOfRows, (const char *)cgroup, false);
L
Liu Jicong 已提交
1145

1146
      // client id
L
Liu Jicong 已提交
1147
      char clientId[256 + VARSTR_HEADER_SIZE] = {0};
1148 1149
      STR_TO_VARSTR(clientId, pConsumer->clientId);

L
Liu Jicong 已提交
1150
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
1151
      colDataSetVal(pColInfo, numOfRows, (const char *)clientId, false);
L
Liu Jicong 已提交
1152 1153

      // status
X
Xiaoyu Wang 已提交
1154 1155
      char        status[20 + VARSTR_HEADER_SIZE] = {0};
      const char *pStatusName = mndConsumerStatusName(pConsumer->status);
1156 1157
      STR_TO_VARSTR(status, pStatusName);

L
Liu Jicong 已提交
1158
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
1159
      colDataSetVal(pColInfo, numOfRows, (const char *)status, false);
L
Liu Jicong 已提交
1160 1161 1162 1163 1164 1165

      // one subscribed topic
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
      if (hasTopic) {
        char        topic[TSDB_TOPIC_FNAME_LEN + VARSTR_HEADER_SIZE] = {0};
        const char *topicName = mndTopicGetShowName(taosArrayGetP(pConsumer->assignedTopics, i));
H
Haojun Liao 已提交
1166
        STR_TO_VARSTR(topic, topicName);
1167
        colDataSetVal(pColInfo, numOfRows, (const char *)topic, false);
L
Liu Jicong 已提交
1168
      } else {
1169
        colDataSetVal(pColInfo, numOfRows, NULL, true);
L
Liu Jicong 已提交
1170 1171 1172
      }

      // end point
L
Liu Jicong 已提交
1173
      /*pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);*/
1174
      /*colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->ep, true);*/
L
Liu Jicong 已提交
1175 1176 1177

      // up time
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
1178
      colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->upTime, false);
L
Liu Jicong 已提交
1179 1180 1181

      // subscribe time
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
1182
      colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->subscribeTime, false);
L
Liu Jicong 已提交
1183 1184 1185

      // rebalance time
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
1186
      colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->rebalanceTime, pConsumer->rebalanceTime == 0);
L
Liu Jicong 已提交
1187

1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
      colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->withTbName, false);

      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
      colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->useSnapshot, false);

      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
      colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->autoCommit, false);

      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
      colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->autoCommitInterval, false);

      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
      colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumer->resetOffsetCfg, false);

L
Liu Jicong 已提交
1203 1204
      numOfRows++;
    }
1205

L
Liu Jicong 已提交
1206 1207
    taosRUnLockLatch(&pConsumer->lock);
    sdbRelease(pSdb, pConsumer);
1208 1209

    pBlock->info.rows = numOfRows;
L
Liu Jicong 已提交
1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
  }

  pShow->numOfRows += numOfRows;
  return numOfRows;
}

static void mndCancelGetNextConsumer(SMnode *pMnode, void *pIter) {
  SSdb *pSdb = pMnode->pSdb;
  sdbCancelFetch(pSdb, pIter);
}

static const char *mndConsumerStatusName(int status) {
  switch (status) {
    case MQ_CONSUMER_STATUS__READY:
      return "ready";
    case MQ_CONSUMER_STATUS__LOST:
    case MQ_CONSUMER_STATUS__LOST_REBD:
      return "lost";
1228
    case MQ_CONSUMER_STATUS_REBALANCE:
L
Liu Jicong 已提交
1229 1230 1231 1232 1233
      return "rebalancing";
    default:
      return "unknown";
  }
}