mndVgroup.c 82.0 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

S
Shengliang Guan 已提交
16
#define _DEFAULT_SOURCE
S
Shengliang Guan 已提交
17
#include "mndVgroup.h"
S
Shengliang Guan 已提交
18
#include "mndDb.h"
S
Shengliang Guan 已提交
19
#include "mndDnode.h"
S
Shengliang Guan 已提交
20
#include "mndMnode.h"
21
#include "mndPrivilege.h"
S
Shengliang Guan 已提交
22 23
#include "mndShow.h"
#include "mndTrans.h"
S
Shengliang Guan 已提交
24
#include "mndUser.h"
H
Haojun Liao 已提交
25
#include "tmisce.h"
S
Shengliang Guan 已提交
26

S
Shengliang Guan 已提交
27 28
#define VGROUP_VER_NUMBER   1
#define VGROUP_RESERVE_SIZE 64
S
Shengliang Guan 已提交
29

S
Shengliang Guan 已提交
30 31 32
static SSdbRow *mndVgroupActionDecode(SSdbRaw *pRaw);
static int32_t  mndVgroupActionInsert(SSdb *pSdb, SVgObj *pVgroup);
static int32_t  mndVgroupActionDelete(SSdb *pSdb, SVgObj *pVgroup);
S
Shengliang Guan 已提交
33
static int32_t  mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew);
S
Shengliang Guan 已提交
34

S
Shengliang Guan 已提交
35
static int32_t mndRetrieveVgroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows);
S
Shengliang Guan 已提交
36
static void    mndCancelGetNextVgroup(SMnode *pMnode, void *pIter);
S
Shengliang Guan 已提交
37
static int32_t mndRetrieveVnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows);
S
Shengliang Guan 已提交
38 39
static void    mndCancelGetNextVnode(SMnode *pMnode, void *pIter);

S
Shengliang Guan 已提交
40 41
static int32_t mndProcessRedistributeVgroupMsg(SRpcMsg *pReq);
static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq);
S
Shengliang Guan 已提交
42
static int32_t mndProcessBalanceVgroupMsg(SRpcMsg *pReq);
C
cadem 已提交
43
static int32_t mndProcessVgroupBalanceLeaderMsg(SRpcMsg *pReq);
S
Shengliang Guan 已提交
44

S
Shengliang Guan 已提交
45
int32_t mndInitVgroup(SMnode *pMnode) {
S
Shengliang Guan 已提交
46 47 48 49 50 51 52 53 54
  SSdbTable table = {
      .sdbType = SDB_VGROUP,
      .keyType = SDB_KEY_INT32,
      .encodeFp = (SdbEncodeFp)mndVgroupActionEncode,
      .decodeFp = (SdbDecodeFp)mndVgroupActionDecode,
      .insertFp = (SdbInsertFp)mndVgroupActionInsert,
      .updateFp = (SdbUpdateFp)mndVgroupActionUpdate,
      .deleteFp = (SdbDeleteFp)mndVgroupActionDelete,
  };
S
Shengliang Guan 已提交
55

56 57 58 59
  mndSetMsgHandle(pMnode, TDMT_DND_CREATE_VNODE_RSP, mndTransProcessRsp);
  mndSetMsgHandle(pMnode, TDMT_VND_ALTER_REPLICA_RSP, mndTransProcessRsp);
  mndSetMsgHandle(pMnode, TDMT_VND_ALTER_CONFIG_RSP, mndTransProcessRsp);
  mndSetMsgHandle(pMnode, TDMT_VND_ALTER_CONFIRM_RSP, mndTransProcessRsp);
S
Shengliang Guan 已提交
60
  mndSetMsgHandle(pMnode, TDMT_VND_ALTER_HASHRANGE_RSP, mndTransProcessRsp);
61 62
  mndSetMsgHandle(pMnode, TDMT_DND_DROP_VNODE_RSP, mndTransProcessRsp);
  mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp);
63
  mndSetMsgHandle(pMnode, TDMT_VND_DISABLE_WRITE_RSP, mndTransProcessRsp);
C
cadem 已提交
64
  mndSetMsgHandle(pMnode, TDMT_SYNC_FORCE_FOLLOWER_RSP, mndTransProcessRsp);
S
Shengliang Guan 已提交
65

66
  mndSetMsgHandle(pMnode, TDMT_MND_REDISTRIBUTE_VGROUP, mndProcessRedistributeVgroupMsg);
S
Shengliang Guan 已提交
67
  mndSetMsgHandle(pMnode, TDMT_MND_SPLIT_VGROUP, mndProcessSplitVgroupMsg);
C
cadem 已提交
68
  //mndSetMsgHandle(pMnode, TDMT_MND_BALANCE_VGROUP, mndProcessVgroupBalanceLeaderMsg);
69
  mndSetMsgHandle(pMnode, TDMT_MND_BALANCE_VGROUP, mndProcessBalanceVgroupMsg);
C
cadem 已提交
70
  mndSetMsgHandle(pMnode, TDMT_MND_BALANCE_VGROUP_LEADER, mndProcessVgroupBalanceLeaderMsg);
71

S
Shengliang Guan 已提交
72 73
  mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_VGROUP, mndRetrieveVgroups);
  mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_VGROUP, mndCancelGetNextVgroup);
S
Shengliang Guan 已提交
74 75 76
  mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_VNODES, mndRetrieveVnodes);
  mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_VNODES, mndCancelGetNextVnode);

S
Shengliang Guan 已提交
77
  return sdbSetTable(pMnode->pSdb, table);
S
Shengliang Guan 已提交
78 79 80 81
}

void mndCleanupVgroup(SMnode *pMnode) {}

S
Shengliang Guan 已提交
82
SSdbRaw *mndVgroupActionEncode(SVgObj *pVgroup) {
83 84
  terrno = TSDB_CODE_OUT_OF_MEMORY;

S
Shengliang Guan 已提交
85
  SSdbRaw *pRaw = sdbAllocRaw(SDB_VGROUP, VGROUP_VER_NUMBER, sizeof(SVgObj) + VGROUP_RESERVE_SIZE);
S
Shengliang Guan 已提交
86
  if (pRaw == NULL) goto _OVER;
S
Shengliang Guan 已提交
87 88

  int32_t dataPos = 0;
S
Shengliang Guan 已提交
89 90 91 92 93 94 95 96
  SDB_SET_INT32(pRaw, dataPos, pVgroup->vgId, _OVER)
  SDB_SET_INT64(pRaw, dataPos, pVgroup->createdTime, _OVER)
  SDB_SET_INT64(pRaw, dataPos, pVgroup->updateTime, _OVER)
  SDB_SET_INT32(pRaw, dataPos, pVgroup->version, _OVER)
  SDB_SET_INT32(pRaw, dataPos, pVgroup->hashBegin, _OVER)
  SDB_SET_INT32(pRaw, dataPos, pVgroup->hashEnd, _OVER)
  SDB_SET_BINARY(pRaw, dataPos, pVgroup->dbName, TSDB_DB_FNAME_LEN, _OVER)
  SDB_SET_INT64(pRaw, dataPos, pVgroup->dbUid, _OVER)
S
Shengliang Guan 已提交
97
  SDB_SET_INT8(pRaw, dataPos, pVgroup->isTsma, _OVER)
S
Shengliang Guan 已提交
98
  SDB_SET_INT8(pRaw, dataPos, pVgroup->replica, _OVER)
S
Shengliang Guan 已提交
99 100
  for (int8_t i = 0; i < pVgroup->replica; ++i) {
    SVnodeGid *pVgid = &pVgroup->vnodeGid[i];
S
Shengliang Guan 已提交
101
    SDB_SET_INT32(pRaw, dataPos, pVgid->dnodeId, _OVER)
102
  }
S
Shengliang Guan 已提交
103
  SDB_SET_RESERVE(pRaw, dataPos, VGROUP_RESERVE_SIZE, _OVER)
S
Shengliang Guan 已提交
104
  SDB_SET_DATALEN(pRaw, dataPos, _OVER)
105 106 107

  terrno = 0;

S
Shengliang Guan 已提交
108
_OVER:
109 110 111 112
  if (terrno != 0) {
    mError("vgId:%d, failed to encode to raw:%p since %s", pVgroup->vgId, pRaw, terrstr());
    sdbFreeRaw(pRaw);
    return NULL;
S
Shengliang Guan 已提交
113 114
  }

115
  mTrace("vgId:%d, encode to raw:%p, row:%p", pVgroup->vgId, pRaw, pVgroup);
S
Shengliang Guan 已提交
116 117 118
  return pRaw;
}

S
Shengliang Guan 已提交
119
SSdbRow *mndVgroupActionDecode(SSdbRaw *pRaw) {
120
  terrno = TSDB_CODE_OUT_OF_MEMORY;
121 122
  SSdbRow *pRow = NULL;
  SVgObj  *pVgroup = NULL;
123

S
Shengliang Guan 已提交
124
  int8_t sver = 0;
S
Shengliang Guan 已提交
125
  if (sdbGetRawSoftVer(pRaw, &sver) != 0) goto _OVER;
S
Shengliang Guan 已提交
126

S
Shengliang Guan 已提交
127
  if (sver != VGROUP_VER_NUMBER) {
S
Shengliang Guan 已提交
128
    terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
S
Shengliang Guan 已提交
129
    goto _OVER;
S
Shengliang Guan 已提交
130 131
  }

132
  pRow = sdbAllocRow(sizeof(SVgObj));
S
Shengliang Guan 已提交
133
  if (pRow == NULL) goto _OVER;
134

135
  pVgroup = sdbGetRowObj(pRow);
S
Shengliang Guan 已提交
136
  if (pVgroup == NULL) goto _OVER;
S
Shengliang Guan 已提交
137 138

  int32_t dataPos = 0;
S
Shengliang Guan 已提交
139 140 141 142 143 144 145 146
  SDB_GET_INT32(pRaw, dataPos, &pVgroup->vgId, _OVER)
  SDB_GET_INT64(pRaw, dataPos, &pVgroup->createdTime, _OVER)
  SDB_GET_INT64(pRaw, dataPos, &pVgroup->updateTime, _OVER)
  SDB_GET_INT32(pRaw, dataPos, &pVgroup->version, _OVER)
  SDB_GET_INT32(pRaw, dataPos, &pVgroup->hashBegin, _OVER)
  SDB_GET_INT32(pRaw, dataPos, &pVgroup->hashEnd, _OVER)
  SDB_GET_BINARY(pRaw, dataPos, pVgroup->dbName, TSDB_DB_FNAME_LEN, _OVER)
  SDB_GET_INT64(pRaw, dataPos, &pVgroup->dbUid, _OVER)
S
Shengliang Guan 已提交
147
  SDB_GET_INT8(pRaw, dataPos, &pVgroup->isTsma, _OVER)
S
Shengliang Guan 已提交
148
  SDB_GET_INT8(pRaw, dataPos, &pVgroup->replica, _OVER)
S
Shengliang Guan 已提交
149 150
  for (int8_t i = 0; i < pVgroup->replica; ++i) {
    SVnodeGid *pVgid = &pVgroup->vnodeGid[i];
S
Shengliang Guan 已提交
151
    SDB_GET_INT32(pRaw, dataPos, &pVgid->dnodeId, _OVER)
S
Shengliang Guan 已提交
152
    if (pVgroup->replica == 1) {
153
      pVgid->syncState = TAOS_SYNC_STATE_LEADER;
S
Shengliang Guan 已提交
154
    }
S
Shengliang Guan 已提交
155
  }
S
Shengliang Guan 已提交
156
  SDB_GET_RESERVE(pRaw, dataPos, VGROUP_RESERVE_SIZE, _OVER)
157 158 159

  terrno = 0;

S
Shengliang Guan 已提交
160
_OVER:
161
  if (terrno != 0) {
162
    mError("vgId:%d, failed to decode from raw:%p since %s", pVgroup == NULL ? 0 : pVgroup->vgId, pRaw, terrstr());
wafwerar's avatar
wafwerar 已提交
163
    taosMemoryFreeClear(pRow);
164 165
    return NULL;
  }
S
Shengliang Guan 已提交
166

167
  mTrace("vgId:%d, decode from raw:%p, row:%p", pVgroup->vgId, pRaw, pVgroup);
S
Shengliang Guan 已提交
168 169 170 171
  return pRow;
}

static int32_t mndVgroupActionInsert(SSdb *pSdb, SVgObj *pVgroup) {
172
  mTrace("vgId:%d, perform insert action, row:%p", pVgroup->vgId, pVgroup);
S
Shengliang Guan 已提交
173 174 175 176
  return 0;
}

static int32_t mndVgroupActionDelete(SSdb *pSdb, SVgObj *pVgroup) {
177
  mTrace("vgId:%d, perform delete action, row:%p", pVgroup->vgId, pVgroup);
S
Shengliang Guan 已提交
178 179 180
  return 0;
}

S
Shengliang Guan 已提交
181 182 183 184 185 186 187
static int32_t mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew) {
  mTrace("vgId:%d, perform update action, old row:%p new row:%p", pOld->vgId, pOld, pNew);
  pOld->updateTime = pNew->updateTime;
  pOld->version = pNew->version;
  pOld->hashBegin = pNew->hashBegin;
  pOld->hashEnd = pNew->hashEnd;
  pOld->replica = pNew->replica;
S
Shengliang Guan 已提交
188
  pOld->isTsma = pNew->isTsma;
189 190 191 192 193 194 195
  for (int32_t i = 0; i < pNew->replica; ++i) {
    SVnodeGid *pNewGid = &pNew->vnodeGid[i];
    for (int32_t j = 0; j < pOld->replica; ++j) {
      SVnodeGid *pOldGid = &pOld->vnodeGid[j];
      if (pNewGid->dnodeId == pOldGid->dnodeId) {
        pNewGid->syncState = pOldGid->syncState;
        pNewGid->syncRestore = pOldGid->syncRestore;
196
        pNewGid->syncCanRead = pOldGid->syncCanRead;
197 198 199
      }
    }
  }
200 201 202 203 204 205
  pNew->numOfTables = pOld->numOfTables;
  pNew->numOfTimeSeries = pOld->numOfTimeSeries;
  pNew->totalStorage = pOld->totalStorage;
  pNew->compStorage = pOld->compStorage;
  pNew->pointsWritten = pOld->pointsWritten;
  pNew->compact = pOld->compact;
S
Shengliang Guan 已提交
206
  memcpy(pOld->vnodeGid, pNew->vnodeGid, TSDB_MAX_REPLICA * sizeof(SVnodeGid));
S
Shengliang Guan 已提交
207 208 209 210
  return 0;
}

SVgObj *mndAcquireVgroup(SMnode *pMnode, int32_t vgId) {
S
Shengliang Guan 已提交
211 212
  SSdb   *pSdb = pMnode->pSdb;
  SVgObj *pVgroup = sdbAcquire(pSdb, SDB_VGROUP, &vgId);
S
Shengliang Guan 已提交
213
  if (pVgroup == NULL && terrno == TSDB_CODE_SDB_OBJ_NOT_THERE) {
S
Shengliang Guan 已提交
214 215 216
    terrno = TSDB_CODE_MND_VGROUP_NOT_EXIST;
  }
  return pVgroup;
S
Shengliang Guan 已提交
217 218 219 220 221 222 223
}

void mndReleaseVgroup(SMnode *pMnode, SVgObj *pVgroup) {
  SSdb *pSdb = pMnode->pSdb;
  sdbRelease(pSdb, pVgroup);
}

S
Shengliang Guan 已提交
224
void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen) {
S
Shengliang Guan 已提交
225 226 227 228 229
  SCreateVnodeReq createReq = {0};
  createReq.vgId = pVgroup->vgId;
  memcpy(createReq.db, pDb->name, TSDB_DB_FNAME_LEN);
  createReq.dbUid = pDb->uid;
  createReq.vgVersion = pVgroup->version;
S
Shengliang Guan 已提交
230 231 232 233
  createReq.numOfStables = pDb->cfg.numOfStables;
  createReq.buffer = pDb->cfg.buffer;
  createReq.pageSize = pDb->cfg.pageSize;
  createReq.pages = pDb->cfg.pages;
234
  createReq.cacheLastSize = pDb->cfg.cacheLastSize;
S
Shengliang Guan 已提交
235 236 237 238
  createReq.daysPerFile = pDb->cfg.daysPerFile;
  createReq.daysToKeep0 = pDb->cfg.daysToKeep0;
  createReq.daysToKeep1 = pDb->cfg.daysToKeep1;
  createReq.daysToKeep2 = pDb->cfg.daysToKeep2;
S
Shengliang Guan 已提交
239 240
  createReq.minRows = pDb->cfg.minRows;
  createReq.maxRows = pDb->cfg.maxRows;
241
  createReq.walFsyncPeriod = pDb->cfg.walFsyncPeriod;
S
Shengliang Guan 已提交
242 243 244
  createReq.walLevel = pDb->cfg.walLevel;
  createReq.precision = pDb->cfg.precision;
  createReq.compression = pDb->cfg.compression;
S
Shengliang Guan 已提交
245
  createReq.strict = pDb->cfg.strict;
246
  createReq.cacheLast = pDb->cfg.cacheLast;
S
Shengliang Guan 已提交
247 248
  createReq.replica = pVgroup->replica;
  createReq.selfIndex = -1;
D
dapan1121 已提交
249 250
  createReq.hashBegin = pVgroup->hashBegin;
  createReq.hashEnd = pVgroup->hashEnd;
S
Shengliang Guan 已提交
251
  createReq.hashMethod = pDb->cfg.hashMethod;
S
sma  
Shengliang Guan 已提交
252 253
  createReq.numOfRetensions = pDb->cfg.numOfRetensions;
  createReq.pRetensions = pDb->cfg.pRetensions;
C
Cary Xu 已提交
254 255
  createReq.isTsma = pVgroup->isTsma;
  createReq.pTsma = pVgroup->pTsma;
S
Shengliang Guan 已提交
256 257 258 259
  createReq.walRetentionPeriod = pDb->cfg.walRetentionPeriod;
  createReq.walRetentionSize = pDb->cfg.walRetentionSize;
  createReq.walRollPeriod = pDb->cfg.walRollPeriod;
  createReq.walSegmentSize = pDb->cfg.walSegmentSize;
260
  createReq.sstTrigger = pDb->cfg.sstTrigger;
261 262
  createReq.hashPrefix = pDb->cfg.hashPrefix;
  createReq.hashSuffix = pDb->cfg.hashSuffix;
263 264
  createReq.tsdbPageSize = pDb->cfg.tsdbPageSize;

S
Shengliang Guan 已提交
265
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
S
Shengliang Guan 已提交
266
    SReplica  *pReplica = &createReq.replicas[v];
S
Shengliang Guan 已提交
267 268 269 270 271 272
    SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
    SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
    if (pVgidDnode == NULL) {
      return NULL;
    }

S
Shengliang Guan 已提交
273 274
    pReplica->id = pVgidDnode->id;
    pReplica->port = pVgidDnode->port;
S
Shengliang Guan 已提交
275 276 277 278
    memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
    mndReleaseDnode(pMnode, pVgidDnode);

    if (pDnode->id == pVgid->dnodeId) {
S
Shengliang Guan 已提交
279
      createReq.selfIndex = v;
S
Shengliang Guan 已提交
280 281 282
    }
  }

S
Shengliang Guan 已提交
283
  if (createReq.selfIndex == -1) {
S
Shengliang Guan 已提交
284
    terrno = TSDB_CODE_APP_ERROR;
S
Shengliang Guan 已提交
285 286 287
    return NULL;
  }

S
Shengliang Guan 已提交
288 289 290 291 292 293
  mInfo("vgId:%d, build create vnode req, replica:%d selfIndex:%d strict:%d", createReq.vgId, createReq.replica,
        createReq.selfIndex, createReq.strict);
  for (int32_t i = 0; i < createReq.replica; ++i) {
    mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.replicas[i].fqdn, createReq.replicas[i].port);
  }

S
Shengliang Guan 已提交
294 295 296 297 298 299
  int32_t contLen = tSerializeSCreateVnodeReq(NULL, 0, &createReq);
  if (contLen < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

wafwerar's avatar
wafwerar 已提交
300
  void *pReq = taosMemoryMalloc(contLen);
S
Shengliang Guan 已提交
301 302 303 304 305 306 307 308
  if (pReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  tSerializeSCreateVnodeReq(pReq, contLen, &createReq);
  *pContLen = contLen;
  return pReq;
S
Shengliang Guan 已提交
309 310
}

S
Shengliang Guan 已提交
311 312
static void *mndBuildAlterVnodeConfigReq(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen) {
  SAlterVnodeConfigReq alterReq = {0};
S
Shengliang Guan 已提交
313 314 315
  alterReq.vgVersion = pVgroup->version;
  alterReq.buffer = pDb->cfg.buffer;
  alterReq.pageSize = pDb->cfg.pageSize;
316
  alterReq.pages = pDb->cfg.pages;
317
  alterReq.cacheLastSize = pDb->cfg.cacheLastSize;
S
Shengliang Guan 已提交
318 319 320 321
  alterReq.daysPerFile = pDb->cfg.daysPerFile;
  alterReq.daysToKeep0 = pDb->cfg.daysToKeep0;
  alterReq.daysToKeep1 = pDb->cfg.daysToKeep1;
  alterReq.daysToKeep2 = pDb->cfg.daysToKeep2;
322
  alterReq.walFsyncPeriod = pDb->cfg.walFsyncPeriod;
S
Shengliang Guan 已提交
323 324
  alterReq.walLevel = pDb->cfg.walLevel;
  alterReq.strict = pDb->cfg.strict;
325
  alterReq.cacheLast = pDb->cfg.cacheLast;
326 327
  alterReq.sttTrigger = pDb->cfg.sstTrigger;
  alterReq.minRows = pDb->cfg.minRows;
328 329
  alterReq.walRetentionPeriod = pDb->cfg.walRetentionPeriod;
  alterReq.walRetentionSize = pDb->cfg.walRetentionSize;
S
Shengliang Guan 已提交
330

S
Shengliang Guan 已提交
331
  mInfo("vgId:%d, build alter vnode config req", pVgroup->vgId);
S
Shengliang Guan 已提交
332
  int32_t contLen = tSerializeSAlterVnodeConfigReq(NULL, 0, &alterReq);
S
Shengliang Guan 已提交
333 334 335 336
  if (contLen < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
S
Shengliang Guan 已提交
337
  contLen += sizeof(SMsgHead);
S
Shengliang Guan 已提交
338 339 340 341 342 343 344 345 346 347 348

  void *pReq = taosMemoryMalloc(contLen);
  if (pReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  SMsgHead *pHead = pReq;
  pHead->contLen = htonl(contLen);
  pHead->vgId = htonl(pVgroup->vgId);

S
Shengliang Guan 已提交
349
  tSerializeSAlterVnodeConfigReq((char *)pReq + sizeof(SMsgHead), contLen, &alterReq);
S
Shengliang Guan 已提交
350 351 352 353
  *pContLen = contLen;
  return pReq;
}

S
Shengliang Guan 已提交
354
static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId,
S
Shengliang Guan 已提交
355
                                          int32_t *pContLen) {
S
Shengliang Guan 已提交
356
  SAlterVnodeReplicaReq alterReq = {
S
Shengliang Guan 已提交
357 358 359 360
      .vgId = pVgroup->vgId,
      .strict = pDb->cfg.strict,
      .replica = pVgroup->replica,
      .selfIndex = -1,
S
Shengliang Guan 已提交
361
  };
S
Shengliang Guan 已提交
362 363 364 365 366

  for (int32_t v = 0; v < pVgroup->replica; ++v) {
    SReplica  *pReplica = &alterReq.replicas[v];
    SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
    SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
367
    if (pVgidDnode == NULL) return NULL;
S
Shengliang Guan 已提交
368 369 370 371 372 373

    pReplica->id = pVgidDnode->id;
    pReplica->port = pVgidDnode->port;
    memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
    mndReleaseDnode(pMnode, pVgidDnode);

S
Shengliang Guan 已提交
374
    if (dnodeId == pVgid->dnodeId) {
S
Shengliang Guan 已提交
375 376 377
      alterReq.selfIndex = v;
    }
  }
378
  alterReq.replica = pVgroup->replica;
S
Shengliang Guan 已提交
379
  mInfo("vgId:%d, build alter vnode req, replica:%d selfIndex:%d strict:%d", alterReq.vgId, alterReq.replica,
380 381 382 383
        alterReq.selfIndex, alterReq.strict);
  for (int32_t i = 0; i < alterReq.replica; ++i) {
    mInfo("vgId:%d, replica:%d ep:%s:%u", alterReq.vgId, i, alterReq.replicas[i].fqdn, alterReq.replicas[i].port);
  }
S
Shengliang Guan 已提交
384 385

  if (alterReq.selfIndex == -1) {
S
Shengliang Guan 已提交
386
    terrno = TSDB_CODE_APP_ERROR;
S
Shengliang Guan 已提交
387 388
    return NULL;
  }
389

S
Shengliang Guan 已提交
390
  int32_t contLen = tSerializeSAlterVnodeReplicaReq(NULL, 0, &alterReq);
391 392 393 394 395 396 397 398 399 400 401
  if (contLen < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  void *pReq = taosMemoryMalloc(contLen);
  if (pReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

S
Shengliang Guan 已提交
402
  tSerializeSAlterVnodeReplicaReq(pReq, contLen, &alterReq);
403 404 405 406
  *pContLen = contLen;
  return pReq;
}

407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
static void *mndBuildDisableVnodeWriteReq(SMnode *pMnode, SDbObj *pDb, int32_t vgId, int32_t *pContLen) {
  SDisableVnodeWriteReq disableReq = {
      .vgId = vgId,
      .disable = 1,
  };

  mInfo("vgId:%d, build disable vnode write req", vgId);
  int32_t contLen = tSerializeSDisableVnodeWriteReq(NULL, 0, &disableReq);
  if (contLen < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  void *pReq = taosMemoryMalloc(contLen);
  if (pReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  tSerializeSDisableVnodeWriteReq(pReq, contLen, &disableReq);
  *pContLen = contLen;
  return pReq;
}

431 432 433 434 435 436 437 438
static void *mndBuildAlterVnodeHashRangeReq(SMnode *pMnode, SVgObj *pVgroup, int32_t dstVgId, int32_t *pContLen) {
  SAlterVnodeHashRangeReq alterReq = {
      .srcVgId = pVgroup->vgId,
      .dstVgId = dstVgId,
      .hashBegin = pVgroup->hashBegin,
      .hashEnd = pVgroup->hashEnd,
  };

439
  mInfo("vgId:%d, build alter vnode hashrange req, dstVgId:%d, hashrange:[%u, %u]", pVgroup->vgId, dstVgId,
440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
        pVgroup->hashBegin, pVgroup->hashEnd);
  int32_t contLen = tSerializeSAlterVnodeHashRangeReq(NULL, 0, &alterReq);
  if (contLen < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  void *pReq = taosMemoryMalloc(contLen);
  if (pReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  tSerializeSAlterVnodeHashRangeReq(pReq, contLen, &alterReq);
  *pContLen = contLen;
  return pReq;
}

L
Liu Jicong 已提交
458
void *mndBuildDropVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen) {
S
Shengliang Guan 已提交
459 460 461 462 463 464
  SDropVnodeReq dropReq = {0};
  dropReq.dnodeId = pDnode->id;
  dropReq.vgId = pVgroup->vgId;
  memcpy(dropReq.db, pDb->name, TSDB_DB_FNAME_LEN);
  dropReq.dbUid = pDb->uid;

S
Shengliang Guan 已提交
465
  mInfo("vgId:%d, build drop vnode req", dropReq.vgId);
S
Shengliang Guan 已提交
466 467
  int32_t contLen = tSerializeSDropVnodeReq(NULL, 0, &dropReq);
  if (contLen < 0) {
S
Shengliang Guan 已提交
468 469 470 471
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

wafwerar's avatar
wafwerar 已提交
472
  void *pReq = taosMemoryMalloc(contLen);
S
Shengliang Guan 已提交
473 474 475 476
  if (pReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
S
Shengliang Guan 已提交
477

S
Shengliang Guan 已提交
478 479 480
  tSerializeSDropVnodeReq(pReq, contLen, &dropReq);
  *pContLen = contLen;
  return pReq;
S
Shengliang Guan 已提交
481 482
}

S
Shengliang Guan 已提交
483 484 485
static bool mndResetDnodesArrayFp(SMnode *pMnode, void *pObj, void *p1, void *p2, void *p3) {
  SDnodeObj *pDnode = pObj;
  pDnode->numOfVnodes = 0;
486
  pDnode->numOfOtherNodes = 0;
S
Shengliang Guan 已提交
487 488 489 490 491 492
  return true;
}

static bool mndBuildDnodesArrayFp(SMnode *pMnode, void *pObj, void *p1, void *p2, void *p3) {
  SDnodeObj *pDnode = pObj;
  SArray    *pArray = p1;
S
Shengliang Guan 已提交
493 494 495 496 497
  int32_t    exceptDnodeId = *(int32_t *)p2;

  if (exceptDnodeId == pDnode->id) {
    return true;
  }
S
Shengliang Guan 已提交
498 499

  int64_t curMs = taosGetTimestampMs();
S
Shengliang Guan 已提交
500
  bool    online = mndIsDnodeOnline(pDnode, curMs);
S
Shengliang Guan 已提交
501 502
  bool    isMnode = mndIsMnode(pMnode, pDnode->id);
  pDnode->numOfVnodes = mndGetVnodesNum(pMnode, pDnode->id);
503
  pDnode->memUsed = mndGetVnodesMemory(pMnode, pDnode->id);
S
Shengliang Guan 已提交
504

505
  mInfo("dnode:%d, vnodes:%d supportVnodes:%d isMnode:%d online:%d memory avail:%" PRId64 " used:%" PRId64, pDnode->id,
506
        pDnode->numOfVnodes, pDnode->numOfSupportVnodes, isMnode, online, pDnode->memAvail, pDnode->memUsed);
S
Shengliang Guan 已提交
507 508

  if (isMnode) {
509
    pDnode->numOfOtherNodes++;
S
Shengliang Guan 已提交
510 511
  }

S
Shengliang Guan 已提交
512 513 514
  if (online && pDnode->numOfSupportVnodes > 0) {
    taosArrayPush(pArray, pDnode);
  }
S
Shengliang Guan 已提交
515 516 517
  return true;
}

S
Shengliang Guan 已提交
518
SArray *mndBuildDnodesArray(SMnode *pMnode, int32_t exceptDnodeId) {
S
Shengliang Guan 已提交
519
  SSdb   *pSdb = pMnode->pSdb;
S
Shengliang Guan 已提交
520
  int32_t numOfDnodes = mndGetDnodeSize(pMnode);
S
Shengliang Guan 已提交
521

S
Shengliang Guan 已提交
522 523 524 525 526
  SArray *pArray = taosArrayInit(numOfDnodes, sizeof(SDnodeObj));
  if (pArray == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
S
Shengliang Guan 已提交
527

S
Shengliang Guan 已提交
528
  sdbTraverse(pSdb, SDB_DNODE, mndResetDnodesArrayFp, NULL, NULL, NULL);
S
Shengliang Guan 已提交
529
  sdbTraverse(pSdb, SDB_DNODE, mndBuildDnodesArrayFp, pArray, &exceptDnodeId, NULL);
530 531 532 533 534 535

  mDebug("build %d dnodes array", (int32_t)taosArrayGetSize(pArray));
  for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pArray); ++i) {
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
    mDebug("dnode:%d, vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes);
  }
S
Shengliang Guan 已提交
536 537 538
  return pArray;
}

sangshuduo's avatar
sangshuduo 已提交
539
static int32_t mndCompareDnodeId(int32_t *dnode1Id, int32_t *dnode2Id) {
X
Xiaoyu Wang 已提交
540 541 542 543
  if (*dnode1Id == *dnode2Id) {
    return 0;
  }
  return *dnode1Id > *dnode2Id ? 1 : -1;
sangshuduo's avatar
sangshuduo 已提交
544
}
S
Shengliang Guan 已提交
545

546 547 548
static float mndGetDnodeScore(SDnodeObj *pDnode, int32_t additionDnodes, float ratio) {
  float totalDnodes = pDnode->numOfVnodes + (float)pDnode->numOfOtherNodes * ratio + additionDnodes;
  return totalDnodes / pDnode->numOfSupportVnodes;
549 550
}

S
Shengliang Guan 已提交
551
static int32_t mndCompareDnodeVnodes(SDnodeObj *pDnode1, SDnodeObj *pDnode2) {
552 553
  float d1Score = mndGetDnodeScore(pDnode1, 0, 0.9);
  float d2Score = mndGetDnodeScore(pDnode2, 0, 0.9);
X
Xiaoyu Wang 已提交
554 555 556 557
  if (d1Score == d2Score) {
    return 0;
  }
  return d1Score > d2Score ? 1 : -1;
S
Shengliang Guan 已提交
558 559
}

S
Shengliang Guan 已提交
560 561 562 563 564 565 566 567 568 569
void mndSortVnodeGid(SVgObj *pVgroup) {
  for (int32_t i = 0; i < pVgroup->replica; ++i) {
    for (int32_t j = 0; j < pVgroup->replica - 1 - i; ++j) {
      if (pVgroup->vnodeGid[j].dnodeId > pVgroup->vnodeGid[j + 1].dnodeId) {
        TSWAP(pVgroup->vnodeGid[j], pVgroup->vnodeGid[j + 1]);
      }
    }
  }
}

570
static int32_t mndGetAvailableDnode(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup, SArray *pArray) {
S
Shengliang Guan 已提交
571 572 573 574
  SSdb   *pSdb = pMnode->pSdb;
  int32_t allocedVnodes = 0;
  void   *pIter = NULL;

575
  mDebug("start to sort %d dnodes", (int32_t)taosArrayGetSize(pArray));
S
Shengliang Guan 已提交
576
  taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes);
577 578
  for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pArray); ++i) {
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
579
    mDebug("dnode:%d, score:%f", pDnode->id, mndGetDnodeScore(pDnode, 0, 0.9));
580
  }
S
Shengliang Guan 已提交
581

S
Shengliang Guan 已提交
582 583 584 585 586 587 588 589
  int32_t size = taosArrayGetSize(pArray);
  if (size < pVgroup->replica) {
    mError("db:%s, vgId:%d, no enough online dnodes:%d to alloc %d replica", pVgroup->dbName, pVgroup->vgId, size,
           pVgroup->replica);
    terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES;
    return -1;
  }

S
Shengliang Guan 已提交
590 591 592
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
    SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
    SDnodeObj *pDnode = taosArrayGet(pArray, v);
593
    if (pDnode == NULL || pDnode->numOfVnodes >= pDnode->numOfSupportVnodes) {
S
Shengliang Guan 已提交
594 595 596 597
      terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES;
      return -1;
    }

598 599 600 601 602 603 604 605 606 607
    int64_t vgMem = mndGetVgroupMemory(pMnode, pDb, pVgroup);
    if (pDnode->memAvail - vgMem - pDnode->memUsed <= 0) {
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d, avail:%" PRId64 " used:%" PRId64,
             pVgroup->dbName, pVgroup->vgId, vgMem, pDnode->id, pDnode->memAvail, pDnode->memUsed);
      terrno = TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE;
      return -1;
    } else {
      pDnode->memUsed += vgMem;
    }

S
Shengliang Guan 已提交
608 609
    pVgid->dnodeId = pDnode->id;
    if (pVgroup->replica == 1) {
610
      pVgid->syncState = TAOS_SYNC_STATE_LEADER;
S
Shengliang Guan 已提交
611
    } else {
612
      pVgid->syncState = TAOS_SYNC_STATE_FOLLOWER;
S
Shengliang Guan 已提交
613 614
    }

615 616
    mInfo("db:%s, vgId:%d, vn:%d is alloced, memory:%" PRId64 ", dnode:%d avail:%" PRId64 " used:%" PRId64,
          pVgroup->dbName, pVgroup->vgId, v, vgMem, pVgid->dnodeId, pDnode->memAvail, pDnode->memUsed);
S
Shengliang Guan 已提交
617
    pDnode->numOfVnodes++;
S
Shengliang Guan 已提交
618
  }
S
Shengliang Guan 已提交
619

S
Shengliang Guan 已提交
620
  mndSortVnodeGid(pVgroup);
S
Shengliang Guan 已提交
621 622
  return 0;
}
623

S
Shengliang Guan 已提交
624
int32_t mndAllocSmaVgroup(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup) {
S
Shengliang Guan 已提交
625
  SArray *pArray = mndBuildDnodesArray(pMnode, 0);
S
Shengliang Guan 已提交
626 627 628 629 630 631 632 633 634 635 636
  if (pArray == NULL) return -1;

  pVgroup->vgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP);
  pVgroup->isTsma = 1;
  pVgroup->createdTime = taosGetTimestampMs();
  pVgroup->updateTime = pVgroup->createdTime;
  pVgroup->version = 1;
  memcpy(pVgroup->dbName, pDb->name, TSDB_DB_FNAME_LEN);
  pVgroup->dbUid = pDb->uid;
  pVgroup->replica = 1;

637
  if (mndGetAvailableDnode(pMnode, pDb, pVgroup, pArray) != 0) return -1;
L
Liu Jicong 已提交
638
  taosArrayDestroy(pArray);
S
Shengliang Guan 已提交
639 640 641 642 643

  mInfo("db:%s, sma vgId:%d is alloced", pDb->name, pVgroup->vgId);
  return 0;
}

S
Shengliang Guan 已提交
644
int32_t mndAllocVgroup(SMnode *pMnode, SDbObj *pDb, SVgObj **ppVgroups) {
S
Shengliang Guan 已提交
645 646 647 648
  int32_t code = -1;
  SArray *pArray = NULL;
  SVgObj *pVgroups = NULL;

wafwerar's avatar
wafwerar 已提交
649
  pVgroups = taosMemoryCalloc(pDb->cfg.numOfVgroups, sizeof(SVgObj));
S
Shengliang Guan 已提交
650 651
  if (pVgroups == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
652
    goto _OVER;
S
Shengliang Guan 已提交
653 654
  }

S
Shengliang Guan 已提交
655
  pArray = mndBuildDnodesArray(pMnode, 0);
S
Shengliang Guan 已提交
656
  if (pArray == NULL) goto _OVER;
S
Shengliang Guan 已提交
657

S
Shengliang Guan 已提交
658 659
  mInfo("db:%s, total %d dnodes used to create %d vgroups (%d vnodes)", pDb->name, (int32_t)taosArrayGetSize(pArray),
        pDb->cfg.numOfVgroups, pDb->cfg.numOfVgroups * pDb->cfg.replications);
S
Shengliang Guan 已提交
660

S
Shengliang Guan 已提交
661
  int32_t  allocedVgroups = 0;
S
Shengliang Guan 已提交
662 663 664
  int32_t  maxVgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP);
  uint32_t hashMin = 0;
  uint32_t hashMax = UINT32_MAX;
665
  uint32_t hashInterval = (hashMax - hashMin) / pDb->cfg.numOfVgroups;
S
Shengliang Guan 已提交
666

667 668
  if (maxVgId < 2) maxVgId = 2;

669
  for (uint32_t v = 0; v < pDb->cfg.numOfVgroups; v++) {
S
Shengliang Guan 已提交
670
    SVgObj *pVgroup = &pVgroups[v];
S
Shengliang Guan 已提交
671
    pVgroup->vgId = maxVgId++;
S
Shengliang Guan 已提交
672 673
    pVgroup->createdTime = taosGetTimestampMs();
    pVgroup->updateTime = pVgroups->createdTime;
S
Shengliang Guan 已提交
674
    pVgroup->version = 1;
S
Shengliang Guan 已提交
675
    pVgroup->hashBegin = hashMin + hashInterval * v;
676
    if (v == pDb->cfg.numOfVgroups - 1) {
S
Shengliang Guan 已提交
677 678
      pVgroup->hashEnd = hashMax;
    } else {
S
Shengliang Guan 已提交
679
      pVgroup->hashEnd = hashMin + hashInterval * (v + 1) - 1;
S
Shengliang Guan 已提交
680
    }
S
Shengliang Guan 已提交
681

682
    memcpy(pVgroup->dbName, pDb->name, TSDB_DB_FNAME_LEN);
S
Shengliang Guan 已提交
683
    pVgroup->dbUid = pDb->uid;
S
Shengliang Guan 已提交
684
    pVgroup->replica = pDb->cfg.replications;
S
Shengliang Guan 已提交
685

686
    if (mndGetAvailableDnode(pMnode, pDb, pVgroup, pArray) != 0) {
S
Shengliang Guan 已提交
687
      goto _OVER;
S
Shengliang Guan 已提交
688 689
    }

S
Shengliang Guan 已提交
690
    allocedVgroups++;
691 692
  }

S
Shengliang Guan 已提交
693
  *ppVgroups = pVgroups;
S
Shengliang Guan 已提交
694 695
  code = 0;

696
  mInfo("db:%s, total %d vgroups is alloced, replica:%d", pDb->name, pDb->cfg.numOfVgroups, pDb->cfg.replications);
S
Shengliang Guan 已提交
697

S
Shengliang Guan 已提交
698
_OVER:
wafwerar's avatar
wafwerar 已提交
699
  if (code != 0) taosMemoryFree(pVgroups);
S
Shengliang Guan 已提交
700 701
  taosArrayDestroy(pArray);
  return code;
702
}
703

L
Liu Jicong 已提交
704
SEpSet mndGetVgroupEpset(SMnode *pMnode, const SVgObj *pVgroup) {
705 706 707
  SEpSet epset = {0};

  for (int32_t v = 0; v < pVgroup->replica; ++v) {
L
Liu Jicong 已提交
708 709
    const SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
    SDnodeObj       *pDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
710 711
    if (pDnode == NULL) continue;

712
    if (pVgid->syncState == TAOS_SYNC_STATE_LEADER) {
713 714 715
      epset.inUse = epset.numOfEps;
    }

H
Haojun Liao 已提交
716
    addEpIntoEpSet(&epset, pDnode->fqdn, pDnode->port);
717 718 719 720 721 722
    mndReleaseDnode(pMnode, pDnode);
  }

  return epset;
}

S
Shengliang Guan 已提交
723 724
static int32_t mndRetrieveVgroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) {
  SMnode *pMnode = pReq->info.node;
S
Shengliang Guan 已提交
725 726 727 728
  SSdb   *pSdb = pMnode->pSdb;
  int32_t numOfRows = 0;
  SVgObj *pVgroup = NULL;
  int32_t cols = 0;
729
  int64_t curMs = taosGetTimestampMs();
S
Shengliang Guan 已提交
730

H
Haojun Liao 已提交
731 732 733 734 735 736 737
  SDbObj *pDb = NULL;
  if (strlen(pShow->db) > 0) {
    pDb = mndAcquireDb(pMnode, pShow->db);
    if (pDb == NULL) {
      return 0;
    }
  }
S
Shengliang Guan 已提交
738

S
Shengliang Guan 已提交
739 740 741 742
  while (numOfRows < rows) {
    pShow->pIter = sdbFetch(pSdb, SDB_VGROUP, pShow->pIter, (void **)&pVgroup);
    if (pShow->pIter == NULL) break;

H
Haojun Liao 已提交
743
    if (pDb != NULL && pVgroup->dbUid != pDb->uid) {
744
      sdbRelease(pSdb, pVgroup);
H
Haojun Liao 已提交
745 746 747 748
      continue;
    }

    cols = 0;
749
    SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
750
    colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->vgId, false);
H
Haojun Liao 已提交
751 752

    SName name = {0};
753 754 755 756
    char  db[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
    tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB);
    tNameGetDbName(&name, varDataVal(db));
    varDataSetLen(db, strlen(varDataVal(db)));
H
Haojun Liao 已提交
757

758
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
759
    colDataSetVal(pColInfo, numOfRows, (const char *)db, false);
H
Haojun Liao 已提交
760

761
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
762
    colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->numOfTables, false);
H
Haojun Liao 已提交
763

764 765
    // default 3 replica, add 1 replica if move vnode
    for (int32_t i = 0; i < 4; ++i) {
766
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
767
      if (i < pVgroup->replica) {
768
        int16_t dnodeId = (int16_t)pVgroup->vnodeGid[i].dnodeId;
769
        colDataSetVal(pColInfo, numOfRows, (const char *)&dnodeId, false);
770

771
        bool       exist = false;
772 773 774
        bool       online = false;
        SDnodeObj *pDnode = mndAcquireDnode(pMnode, pVgroup->vnodeGid[i].dnodeId);
        if (pDnode != NULL) {
775
          exist = true;
S
Shengliang Guan 已提交
776
          online = mndIsDnodeOnline(pDnode, curMs);
777 778 779
          mndReleaseDnode(pMnode, pDnode);
        }

780 781
        char buf1[20] = {0};
        char role[20] = "offline";
782 783 784
        if (!exist) {
          strcpy(role, "dropping");
        } else if (online) {
785 786 787 788 789 790 791 792 793 794
          char *star = "";
          if (pVgroup->vnodeGid[i].syncState == TAOS_SYNC_STATE_LEADER) {
            if (!pVgroup->vnodeGid[i].syncRestore && !pVgroup->vnodeGid[i].syncCanRead) {
              star = "**";
            } else if (!pVgroup->vnodeGid[i].syncRestore && pVgroup->vnodeGid[i].syncCanRead) {
              star = "*";
            } else {
            }
          }
          snprintf(role, sizeof(role), "%s%s", syncStr(pVgroup->vnodeGid[i].syncState), star);
795
        } else {
796
        }
S
Shengliang Guan 已提交
797
        STR_WITH_MAXSIZE_TO_VARSTR(buf1, role, pShow->pMeta->pSchemas[cols].bytes);
798 799

        pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
800
        colDataSetVal(pColInfo, numOfRows, (const char *)buf1, false);
801
      } else {
802
        colDataSetNULL(pColInfo, numOfRows);
803
        pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
804
        colDataSetNULL(pColInfo, numOfRows);
805
      }
S
Shengliang Guan 已提交
806
    }
S
Shengliang Guan 已提交
807

808
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
809
    int32_t cacheUsage = (int32_t)pVgroup->cacheUsage;
810
    colDataSetVal(pColInfo, numOfRows, (const char *)&cacheUsage, false);
811

D
dapan1121 已提交
812 813 814
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
    colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->numOfCachedTables, false);

S
Shengliang Guan 已提交
815
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
816
    colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->isTsma, false);
S
Shengliang Guan 已提交
817

H
Hongze Cheng 已提交
818 819
    // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
    // if (pDb == NULL || pDb->compactStartTime <= 0) {
X
Xiaoyu Wang 已提交
820
    //   colDataSetNULL(pColInfo, numOfRows);
H
Hongze Cheng 已提交
821 822 823
    // } else {
    //   colDataAppend(pColInfo, numOfRows, (const char *)&pDb->compactStartTime, false);
    // }
824

H
Haojun Liao 已提交
825
    numOfRows++;
S
Shengliang Guan 已提交
826 827 828
    sdbRelease(pSdb, pVgroup);
  }

H
Haojun Liao 已提交
829 830 831 832
  if (pDb != NULL) {
    mndReleaseDb(pMnode, pDb);
  }

833
  pShow->numOfRows += numOfRows;
S
Shengliang Guan 已提交
834 835 836 837 838 839 840 841
  return numOfRows;
}

static void mndCancelGetNextVgroup(SMnode *pMnode, void *pIter) {
  SSdb *pSdb = pMnode->pSdb;
  sdbCancelFetch(pSdb, pIter);
}

S
Shengliang Guan 已提交
842 843 844 845
static bool mndGetVnodesNumFp(SMnode *pMnode, void *pObj, void *p1, void *p2, void *p3) {
  SVgObj  *pVgroup = pObj;
  int32_t  dnodeId = *(int32_t *)p1;
  int32_t *pNumOfVnodes = (int32_t *)p2;
S
Shengliang Guan 已提交
846

S
Shengliang Guan 已提交
847 848 849
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
    if (pVgroup->vnodeGid[v].dnodeId == dnodeId) {
      (*pNumOfVnodes)++;
S
Shengliang Guan 已提交
850
    }
S
Shengliang Guan 已提交
851 852
  }

S
Shengliang Guan 已提交
853 854 855 856 857 858
  return true;
}

int32_t mndGetVnodesNum(SMnode *pMnode, int32_t dnodeId) {
  int32_t numOfVnodes = 0;
  sdbTraverse(pMnode->pSdb, SDB_VGROUP, mndGetVnodesNumFp, &dnodeId, &numOfVnodes, NULL);
S
Shengliang Guan 已提交
859
  return numOfVnodes;
S
Shengliang Guan 已提交
860 861
}

862 863 864 865 866 867
int64_t mndGetVgroupMemory(SMnode *pMnode, SDbObj *pDbInput, SVgObj *pVgroup) {
  SDbObj *pDb = pDbInput;
  if (pDbInput == NULL) {
    pDb = mndAcquireDb(pMnode, pVgroup->dbName);
  }

S
Shengliang Guan 已提交
868 869 870
  int64_t vgroupMemroy = 0;
  if (pDb != NULL) {
    vgroupMemroy = (int64_t)pDb->cfg.buffer * 1024 * 1024 + (int64_t)pDb->cfg.pages * pDb->cfg.pageSize * 1024;
871 872
    if (pDb->cfg.cacheLast > 0) {
      vgroupMemroy += (int64_t)pDb->cfg.cacheLastSize * 1024 * 1024;
S
Shengliang Guan 已提交
873
    }
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901
  }

  if (pDbInput == NULL) {
    mndReleaseDb(pMnode, pDb);
  }
  return vgroupMemroy;
}

static bool mndGetVnodeMemroyFp(SMnode *pMnode, void *pObj, void *p1, void *p2, void *p3) {
  SVgObj  *pVgroup = pObj;
  int32_t  dnodeId = *(int32_t *)p1;
  int64_t *pVnodeMemory = (int64_t *)p2;

  for (int32_t v = 0; v < pVgroup->replica; ++v) {
    if (pVgroup->vnodeGid[v].dnodeId == dnodeId) {
      *pVnodeMemory += mndGetVgroupMemory(pMnode, NULL, pVgroup);
    }
  }

  return true;
}

int64_t mndGetVnodesMemory(SMnode *pMnode, int32_t dnodeId) {
  int64_t vnodeMemory = 0;
  sdbTraverse(pMnode->pSdb, SDB_VGROUP, mndGetVnodeMemroyFp, &dnodeId, &vnodeMemory, NULL);
  return vnodeMemory;
}

S
Shengliang Guan 已提交
902 903
static int32_t mndRetrieveVnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) {
  SMnode *pMnode = pReq->info.node;
S
Shengliang Guan 已提交
904 905 906 907 908 909 910 911 912
  SSdb   *pSdb = pMnode->pSdb;
  int32_t numOfRows = 0;
  SVgObj *pVgroup = NULL;
  int32_t cols = 0;

  while (numOfRows < rows) {
    pShow->pIter = sdbFetch(pSdb, SDB_VGROUP, pShow->pIter, (void **)&pVgroup);
    if (pShow->pIter == NULL) break;

S
Shengliang Guan 已提交
913
    for (int32_t i = 0; i < pVgroup->replica && numOfRows < rows; ++i) {
S
Shengliang Guan 已提交
914 915
      SVnodeGid       *pVgid = &pVgroup->vnodeGid[i];
      SColumnInfoData *pColInfo = NULL;
S
Shengliang Guan 已提交
916 917
      cols = 0;

918
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
919
      colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->vgId, false);
H
Haojun Liao 已提交
920

921
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
922
      colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->replica, false);
H
Haojun Liao 已提交
923

924
      char buf[20] = {0};
925
      STR_TO_VARSTR(buf, syncStr(pVgid->syncState));
926
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
927
      colDataSetVal(pColInfo, numOfRows, (const char *)buf, false);
H
Haojun Liao 已提交
928

S
Shengliang Guan 已提交
929 930 931 932 933 934 935 936
      const char *dbname = mndGetDbStr(pVgroup->dbName);
      char        b1[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
      if (dbname != NULL) {
        STR_WITH_MAXSIZE_TO_VARSTR(b1, dbname, TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE);
      } else {
        STR_WITH_MAXSIZE_TO_VARSTR(b1, "NULL", TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE);
      }
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
937
      colDataSetVal(pColInfo, numOfRows, (const char *)b1, false);
S
Shengliang Guan 已提交
938 939

      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
940
      colDataSetVal(pColInfo, numOfRows, (const char *)&pVgid->dnodeId, false);
S
Shengliang Guan 已提交
941 942 943 944 945 946 947 948

      SDnodeObj *pDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
      char       b2[TSDB_EP_LEN + VARSTR_HEADER_SIZE] = {0};
      if (pDnode != NULL) {
        STR_WITH_MAXSIZE_TO_VARSTR(b2, pDnode->ep, TSDB_EP_LEN + VARSTR_HEADER_SIZE);
      } else {
        STR_WITH_MAXSIZE_TO_VARSTR(b2, "NULL", TSDB_EP_LEN + VARSTR_HEADER_SIZE);
      }
949
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
950
      colDataSetVal(pColInfo, numOfRows, (const char *)b2, false);
H
Haojun Liao 已提交
951

S
Shengliang Guan 已提交
952 953 954 955 956 957
      numOfRows++;
    }

    sdbRelease(pSdb, pVgroup);
  }

958
  pShow->numOfRows += numOfRows;
S
Shengliang Guan 已提交
959 960 961 962 963 964
  return numOfRows;
}

static void mndCancelGetNextVnode(SMnode *pMnode, void *pIter) {
  SSdb *pSdb = pMnode->pSdb;
  sdbCancelFetch(pSdb, pIter);
L
Liu Jicong 已提交
965
}
S
Shengliang Guan 已提交
966

967
static int32_t mndAddVnodeToVgroup(SMnode *pMnode, STrans *pTrans, SVgObj *pVgroup, SArray *pArray) {
S
Shengliang Guan 已提交
968 969 970
  taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes);
  for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) {
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
971
    mInfo("dnode:%d, equivalent vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes);
S
Shengliang Guan 已提交
972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
  }

  SVnodeGid *pVgid = &pVgroup->vnodeGid[pVgroup->replica];
  for (int32_t d = 0; d < taosArrayGetSize(pArray); ++d) {
    SDnodeObj *pDnode = taosArrayGet(pArray, d);

    bool used = false;
    for (int32_t vn = 0; vn < pVgroup->replica; ++vn) {
      if (pDnode->id == pVgroup->vnodeGid[vn].dnodeId) {
        used = true;
        break;
      }
    }
    if (used) continue;

987
    if (pDnode == NULL || pDnode->numOfVnodes >= pDnode->numOfSupportVnodes) {
S
Shengliang Guan 已提交
988 989 990 991
      terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES;
      return -1;
    }

992 993 994 995 996 997 998 999 1000 1001
    int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
    if (pDnode->memAvail - vgMem - pDnode->memUsed <= 0) {
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d avail:%" PRId64 " used:%" PRId64,
             pVgroup->dbName, pVgroup->vgId, vgMem, pDnode->id, pDnode->memAvail, pDnode->memUsed);
      terrno = TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE;
      return -1;
    } else {
      pDnode->memUsed += vgMem;
    }

S
Shengliang Guan 已提交
1002
    pVgid->dnodeId = pDnode->id;
S
Shengliang Guan 已提交
1003
    pVgid->syncState = TAOS_SYNC_STATE_OFFLINE;
1004 1005
    mInfo("db:%s, vgId:%d, vn:%d is added, memory:%" PRId64 ", dnode:%d avail:%" PRId64 " used:%" PRId64,
          pVgroup->dbName, pVgroup->vgId, pVgroup->replica, vgMem, pVgid->dnodeId, pDnode->memAvail, pDnode->memUsed);
S
Shengliang Guan 已提交
1006 1007 1008

    pVgroup->replica++;
    pDnode->numOfVnodes++;
1009 1010 1011 1012 1013 1014 1015 1016 1017

    SSdbRaw *pVgRaw = mndVgroupActionEncode(pVgroup);
    if (pVgRaw == NULL) return -1;
    if (mndTransAppendRedolog(pTrans, pVgRaw) != 0) {
      sdbFreeRaw(pVgRaw);
      return -1;
    }
    (void)sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);

S
Shengliang Guan 已提交
1018 1019 1020 1021 1022 1023 1024 1025
    return 0;
  }

  terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES;
  mError("db:%s, failed to add vnode to vgId:%d since %s", pVgroup->dbName, pVgroup->vgId, terrstr());
  return -1;
}

1026 1027
static int32_t mndRemoveVnodeFromVgroup(SMnode *pMnode, STrans *pTrans, SVgObj *pVgroup, SArray *pArray,
                                        SVnodeGid *pDelVgid) {
S
Shengliang Guan 已提交
1028 1029 1030
  taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes);
  for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) {
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
1031
    mInfo("dnode:%d, equivalent vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes);
S
Shengliang Guan 已提交
1032 1033 1034 1035 1036 1037 1038 1039 1040
  }

  int32_t code = -1;
  for (int32_t d = taosArrayGetSize(pArray) - 1; d >= 0; --d) {
    SDnodeObj *pDnode = taosArrayGet(pArray, d);

    for (int32_t vn = 0; vn < pVgroup->replica; ++vn) {
      SVnodeGid *pVgid = &pVgroup->vnodeGid[vn];
      if (pVgid->dnodeId == pDnode->id) {
1041 1042 1043 1044
        int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
        pDnode->memUsed -= vgMem;
        mInfo("db:%s, vgId:%d, vn:%d is removed, memory:%" PRId64 ", dnode:%d avail:%" PRId64 " used:%" PRId64,
              pVgroup->dbName, pVgroup->vgId, vn, vgMem, pVgid->dnodeId, pDnode->memAvail, pDnode->memUsed);
S
Shengliang Guan 已提交
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
        pDnode->numOfVnodes--;
        pVgroup->replica--;
        *pDelVgid = *pVgid;
        *pVgid = pVgroup->vnodeGid[pVgroup->replica];
        memset(&pVgroup->vnodeGid[pVgroup->replica], 0, sizeof(SVnodeGid));
        code = 0;
        goto _OVER;
      }
    }
  }

_OVER:
  if (code != 0) {
    terrno = TSDB_CODE_APP_ERROR;
    mError("db:%s, failed to remove vnode from vgId:%d since %s", pVgroup->dbName, pVgroup->vgId, terrstr());
    return -1;
  }

  for (int32_t vn = 0; vn < pVgroup->replica; ++vn) {
    SVnodeGid *pVgid = &pVgroup->vnodeGid[vn];
    mInfo("db:%s, vgId:%d, vn:%d dnode:%d is reserved", pVgroup->dbName, pVgroup->vgId, vn, pVgid->dnodeId);
  }
1067 1068 1069 1070 1071 1072 1073 1074 1075

  SSdbRaw *pVgRaw = mndVgroupActionEncode(pVgroup);
  if (pVgRaw == NULL) return -1;
  if (mndTransAppendRedolog(pTrans, pVgRaw) != 0) {
    sdbFreeRaw(pVgRaw);
    return -1;
  }
  (void)sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);

S
Shengliang Guan 已提交
1076 1077 1078
  return 0;
}

S
Shengliang Guan 已提交
1079
int32_t mndAddCreateVnodeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SVnodeGid *pVgid) {
S
Shengliang Guan 已提交
1080 1081 1082 1083 1084 1085 1086 1087
  STransAction action = {0};

  SDnodeObj *pDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
  if (pDnode == NULL) return -1;
  action.epSet = mndGetDnodeEpset(pDnode);
  mndReleaseDnode(pMnode, pDnode);

  int32_t contLen = 0;
S
Shengliang Guan 已提交
1088
  void   *pReq = mndBuildCreateVnodeReq(pMnode, pDnode, pDb, pVgroup, &contLen);
S
Shengliang Guan 已提交
1089 1090 1091 1092 1093
  if (pReq == NULL) return -1;

  action.pCont = pReq;
  action.contLen = contLen;
  action.msgType = TDMT_DND_CREATE_VNODE;
1094
  action.acceptableCode = TSDB_CODE_VND_ALREADY_EXIST;
S
Shengliang Guan 已提交
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107

  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    taosMemoryFree(pReq);
    return -1;
  }

  return 0;
}

int32_t mndAddAlterVnodeConfirmAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup) {
  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgroup);

1108
  mInfo("vgId:%d, build alter vnode confirm req", pVgroup->vgId);
S
Shengliang Guan 已提交
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
  int32_t   contLen = sizeof(SMsgHead);
  SMsgHead *pHead = taosMemoryMalloc(contLen);
  if (pHead == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  pHead->contLen = htonl(contLen);
  pHead->vgId = htonl(pVgroup->vgId);

  action.pCont = pHead;
  action.contLen = contLen;
  action.msgType = TDMT_VND_ALTER_CONFIRM;
1122 1123
  // incorrect redirect result will cause this erro
  action.retryCode = TSDB_CODE_VND_INVALID_VGROUP_ID;
S
Shengliang Guan 已提交
1124 1125 1126 1127 1128 1129 1130 1131 1132

  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    taosMemoryFree(pHead);
    return -1;
  }

  return 0;
}

1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
static int32_t mndAddAlterVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, SVgObj *pVgroup, int32_t dstVgId) {
  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgroup);

  int32_t contLen = 0;
  void   *pReq = mndBuildAlterVnodeHashRangeReq(pMnode, pVgroup, dstVgId, &contLen);
  if (pReq == NULL) return -1;

  action.pCont = pReq;
  action.contLen = contLen;
  action.msgType = TDMT_VND_ALTER_HASHRANGE;

  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    taosMemoryFree(pReq);
    return -1;
  }

  return 0;
}
S
Shengliang Guan 已提交
1152 1153

int32_t mndAddAlterVnodeConfigAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup) {
S
Shengliang Guan 已提交
1154 1155 1156 1157
  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgroup);

  int32_t contLen = 0;
S
Shengliang Guan 已提交
1158
  void   *pReq = mndBuildAlterVnodeConfigReq(pMnode, pDb, pVgroup, &contLen);
S
Shengliang Guan 已提交
1159 1160 1161 1162
  if (pReq == NULL) return -1;

  action.pCont = pReq;
  action.contLen = contLen;
S
Shengliang Guan 已提交
1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187
  action.msgType = TDMT_VND_ALTER_CONFIG;

  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    taosMemoryFree(pReq);
    return -1;
  }

  return 0;
}

int32_t mndAddAlterVnodeReplicaAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId) {
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
  if (pDnode == NULL) return -1;

  STransAction action = {0};
  action.epSet = mndGetDnodeEpset(pDnode);
  mndReleaseDnode(pMnode, pDnode);

  int32_t contLen = 0;
  void   *pReq = mndBuildAlterVnodeReplicaReq(pMnode, pDb, pVgroup, dnodeId, &contLen);
  if (pReq == NULL) return -1;

  action.pCont = pReq;
  action.contLen = contLen;
  action.msgType = TDMT_VND_ALTER_REPLICA;
S
Shengliang Guan 已提交
1188 1189 1190 1191 1192 1193 1194 1195 1196

  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    taosMemoryFree(pReq);
    return -1;
  }

  return 0;
}

X
Xiaoyu Wang 已提交
1197 1198
static int32_t mndAddDisableVnodeWriteAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
                                             int32_t dnodeId) {
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
  if (pDnode == NULL) return -1;

  STransAction action = {0};
  action.epSet = mndGetDnodeEpset(pDnode);
  mndReleaseDnode(pMnode, pDnode);

  int32_t contLen = 0;
  void   *pReq = mndBuildDisableVnodeWriteReq(pMnode, pDb, pVgroup->vgId, &contLen);
  if (pReq == NULL) return -1;

  action.pCont = pReq;
  action.contLen = contLen;
  action.msgType = TDMT_VND_DISABLE_WRITE;

  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    taosMemoryFree(pReq);
    return -1;
  }

  return 0;
}

S
Shengliang Guan 已提交
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237
int32_t mndAddDropVnodeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SVnodeGid *pVgid,
                              bool isRedo) {
  STransAction action = {0};

  SDnodeObj *pDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
  if (pDnode == NULL) return -1;
  action.epSet = mndGetDnodeEpset(pDnode);
  mndReleaseDnode(pMnode, pDnode);

  int32_t contLen = 0;
  void   *pReq = mndBuildDropVnodeReq(pMnode, pDnode, pDb, pVgroup, &contLen);
  if (pReq == NULL) return -1;

  action.pCont = pReq;
  action.contLen = contLen;
  action.msgType = TDMT_DND_DROP_VNODE;
1238
  action.acceptableCode = TSDB_CODE_VND_NOT_EXIST;
S
Shengliang Guan 已提交
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255

  if (isRedo) {
    if (mndTransAppendRedoAction(pTrans, &action) != 0) {
      taosMemoryFree(pReq);
      return -1;
    }
  } else {
    if (mndTransAppendUndoAction(pTrans, &action) != 0) {
      taosMemoryFree(pReq);
      return -1;
    }
  }

  return 0;
}

int32_t mndSetMoveVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t vnIndex,
S
Shengliang Guan 已提交
1256
                                    SArray *pArray, bool force) {
S
Shengliang Guan 已提交
1257 1258
  SVgObj newVg = {0};
  memcpy(&newVg, pVgroup, sizeof(SVgObj));
S
Shengliang Guan 已提交
1259

S
Shengliang Guan 已提交
1260 1261 1262
  mInfo("vgId:%d, vgroup info before move, replica:%d", newVg.vgId, newVg.replica);
  for (int32_t i = 0; i < newVg.replica; ++i) {
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
S
Shengliang Guan 已提交
1263 1264
  }

S
Shengliang Guan 已提交
1265
  if (!force) {
1266 1267 1268
#if 1
    {
#else
1269
    if (newVg.replica == 1) {
1270 1271
#endif
      mInfo("vgId:%d, will add 1 vnode, replca:%d", pVgroup->vgId, newVg.replica);
1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291
      if (mndAddVnodeToVgroup(pMnode, pTrans, &newVg, pArray) != 0) return -1;
      for (int32_t i = 0; i < newVg.replica - 1; ++i) {
        if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId) != 0) return -1;
      }
      if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg, &newVg.vnodeGid[newVg.replica - 1]) != 0) return -1;
      if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg) != 0) return -1;

      mInfo("vgId:%d, will remove 1 vnode, replca:2", pVgroup->vgId);
      newVg.replica--;
      SVnodeGid del = newVg.vnodeGid[vnIndex];
      newVg.vnodeGid[vnIndex] = newVg.vnodeGid[newVg.replica];
      memset(&newVg.vnodeGid[newVg.replica], 0, sizeof(SVnodeGid));
      {
        SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
        if (pRaw == NULL) return -1;
        if (mndTransAppendRedolog(pTrans, pRaw) != 0) {
          sdbFreeRaw(pRaw);
          return -1;
        }
        (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
1292 1293
      }

1294 1295 1296 1297 1298
      if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVg, &del, true) != 0) return -1;
      for (int32_t i = 0; i < newVg.replica; ++i) {
        if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId) != 0) return -1;
      }
      if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg) != 0) return -1;
1299 1300 1301
#if 1
    }
#else
1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320
    } else {  // new replica == 3
      mInfo("vgId:%d, will add 1 vnode, replca:3", pVgroup->vgId);
      if (mndAddVnodeToVgroup(pMnode, pTrans, &newVg, pArray) != 0) return -1;
      mInfo("vgId:%d, will remove 1 vnode, replca:4", pVgroup->vgId);
      newVg.replica--;
      SVnodeGid del = newVg.vnodeGid[vnIndex];
      newVg.vnodeGid[vnIndex] = newVg.vnodeGid[newVg.replica];
      memset(&newVg.vnodeGid[newVg.replica], 0, sizeof(SVnodeGid));
      {
        SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
        if (pRaw == NULL) return -1;
        if (mndTransAppendRedolog(pTrans, pRaw) != 0) {
          sdbFreeRaw(pRaw);
          return -1;
        }
        (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
      }

      if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVg, &del, true) != 0) return -1;
1321
      for (int32_t i = 0; i < newVg.replica; ++i) {
1322 1323 1324 1325 1326
        if (i == vnIndex) continue;
        if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId) != 0) return -1;
      }
      if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg, &newVg.vnodeGid[vnIndex]) != 0) return -1;
      if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg) != 0) return -1;
S
Shengliang Guan 已提交
1327
    }
1328
#endif
S
Shengliang Guan 已提交
1329 1330
  } else {
    mInfo("vgId:%d, will add 1 vnode and force remove 1 vnode", pVgroup->vgId);
1331
    if (mndAddVnodeToVgroup(pMnode, pTrans, &newVg, pArray) != 0) return -1;
S
Shengliang Guan 已提交
1332 1333 1334 1335
    newVg.replica--;
    SVnodeGid del = newVg.vnodeGid[vnIndex];
    newVg.vnodeGid[vnIndex] = newVg.vnodeGid[newVg.replica];
    memset(&newVg.vnodeGid[newVg.replica], 0, sizeof(SVnodeGid));
1336 1337 1338 1339 1340 1341 1342 1343 1344
    {
      SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
      if (pRaw == NULL) return -1;
      if (mndTransAppendRedolog(pTrans, pRaw) != 0) {
        sdbFreeRaw(pRaw);
        return -1;
      }
      (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
    }
S
Shengliang Guan 已提交
1345 1346 1347 1348 1349 1350

    for (int32_t i = 0; i < newVg.replica; ++i) {
      if (i != vnIndex) {
        if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId) != 0) return -1;
      }
    }
1351
    if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg, &newVg.vnodeGid[vnIndex]) != 0) return -1;
S
Shengliang Guan 已提交
1352 1353 1354
    if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg) != 0) return -1;

    if (newVg.replica == 1) {
S
Shengliang Guan 已提交
1355
      mInfo("vgId:%d, all data is dropped since replica=1", pVgroup->vgId);
S
Shengliang Guan 已提交
1356
    }
S
Shengliang Guan 已提交
1357
  }
S
Shengliang Guan 已提交
1358

1359 1360
  {
    SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
1361 1362 1363 1364 1365
    if (pRaw == NULL) return -1;
    if (mndTransAppendCommitlog(pTrans, pRaw) != 0) {
      sdbFreeRaw(pRaw);
      return -1;
    }
S
Shengliang Guan 已提交
1366
    (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
1367
  }
S
Shengliang Guan 已提交
1368

S
Shengliang Guan 已提交
1369 1370 1371
  mInfo("vgId:%d, vgroup info after move, replica:%d", newVg.vgId, newVg.replica);
  for (int32_t i = 0; i < newVg.replica; ++i) {
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
S
Shengliang Guan 已提交
1372 1373 1374 1375
  }
  return 0;
}

S
Shengliang Guan 已提交
1376
int32_t mndSetMoveVgroupsInfoToTrans(SMnode *pMnode, STrans *pTrans, int32_t delDnodeId, bool force) {
S
Shengliang Guan 已提交
1377
  int32_t code = 0;
S
Shengliang Guan 已提交
1378
  SArray *pArray = mndBuildDnodesArray(pMnode, delDnodeId);
S
Shengliang Guan 已提交
1379 1380 1381 1382 1383 1384 1385 1386 1387 1388
  if (pArray == NULL) return -1;

  void *pIter = NULL;
  while (1) {
    SVgObj *pVgroup = NULL;
    pIter = sdbFetch(pMnode->pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
    if (pIter == NULL) break;

    int32_t vnIndex = -1;
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
S
Shengliang Guan 已提交
1389
      if (pVgroup->vnodeGid[i].dnodeId == delDnodeId) {
S
Shengliang Guan 已提交
1390 1391 1392 1393 1394
        vnIndex = i;
        break;
      }
    }

S
Shengliang Guan 已提交
1395
    code = 0;
S
Shengliang Guan 已提交
1396
    if (vnIndex != -1) {
S
Shengliang Guan 已提交
1397
      mInfo("vgId:%d, vnode:%d will be removed from dnode:%d, force:%d", pVgroup->vgId, vnIndex, delDnodeId, force);
S
Shengliang Guan 已提交
1398
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
S
Shengliang Guan 已提交
1399
      code = mndSetMoveVgroupInfoToTrans(pMnode, pTrans, pDb, pVgroup, vnIndex, pArray, force);
S
Shengliang Guan 已提交
1400 1401 1402 1403
      mndReleaseDb(pMnode, pDb);
    }

    sdbRelease(pMnode->pSdb, pVgroup);
S
Shengliang Guan 已提交
1404 1405 1406 1407 1408

    if (code != 0) {
      sdbCancelFetch(pMnode->pSdb, pIter);
      break;
    }
S
Shengliang Guan 已提交
1409 1410 1411
  }

  taosArrayDestroy(pArray);
S
Shengliang Guan 已提交
1412
  return code;
S
Shengliang Guan 已提交
1413 1414 1415 1416
}

static int32_t mndAddIncVgroupReplicaToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
                                             int32_t newDnodeId) {
1417
  mInfo("vgId:%d, will add 1 vnode, replica:%d dnode:%d", pVgroup->vgId, pVgroup->replica, newDnodeId);
S
Shengliang Guan 已提交
1418 1419 1420 1421

  SVnodeGid *pGid = &pVgroup->vnodeGid[pVgroup->replica];
  pVgroup->replica++;
  pGid->dnodeId = newDnodeId;
S
Shengliang Guan 已提交
1422
  pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
S
Shengliang Guan 已提交
1423

1424 1425 1426 1427 1428 1429 1430 1431
  SSdbRaw *pVgRaw = mndVgroupActionEncode(pVgroup);
  if (pVgRaw == NULL) return -1;
  if (mndTransAppendRedolog(pTrans, pVgRaw) != 0) {
    sdbFreeRaw(pVgRaw);
    return -1;
  }
  (void)sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);

S
Shengliang Guan 已提交
1432 1433
  for (int32_t i = 0; i < pVgroup->replica - 1; ++i) {
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, pVgroup, pVgroup->vnodeGid[i].dnodeId) != 0) return -1;
S
Shengliang Guan 已提交
1434
  }
1435
  if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, pVgroup, pGid) != 0) return -1;
S
Shengliang Guan 已提交
1436
  if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, pVgroup) != 0) return -1;
S
Shengliang Guan 已提交
1437 1438 1439 1440 1441 1442

  return 0;
}

static int32_t mndAddDecVgroupReplicaFromTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
                                               int32_t delDnodeId) {
1443
  mInfo("vgId:%d, will remove 1 vnode, replica:%d dnode:%d", pVgroup->vgId, pVgroup->replica, delDnodeId);
S
Shengliang Guan 已提交
1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455

  SVnodeGid *pGid = NULL;
  SVnodeGid  delGid = {0};
  for (int32_t i = 0; i < pVgroup->replica; ++i) {
    if (pVgroup->vnodeGid[i].dnodeId == delDnodeId) {
      pGid = &pVgroup->vnodeGid[i];
      break;
    }
  }

  if (pGid == NULL) return 0;

1456
  pVgroup->replica--;
S
Shengliang Guan 已提交
1457 1458 1459 1460
  memcpy(&delGid, pGid, sizeof(SVnodeGid));
  memcpy(pGid, &pVgroup->vnodeGid[pVgroup->replica], sizeof(SVnodeGid));
  memset(&pVgroup->vnodeGid[pVgroup->replica], 0, sizeof(SVnodeGid));

1461 1462 1463 1464 1465 1466 1467 1468
  SSdbRaw *pVgRaw = mndVgroupActionEncode(pVgroup);
  if (pVgRaw == NULL) return -1;
  if (mndTransAppendRedolog(pTrans, pVgRaw) != 0) {
    sdbFreeRaw(pVgRaw);
    return -1;
  }
  (void)sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);

S
Shengliang Guan 已提交
1469 1470 1471
  if (mndAddDropVnodeAction(pMnode, pTrans, pDb, pVgroup, &delGid, true) != 0) return -1;
  for (int32_t i = 0; i < pVgroup->replica; ++i) {
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, pVgroup, pVgroup->vnodeGid[i].dnodeId) != 0) return -1;
S
Shengliang Guan 已提交
1472
  }
S
Shengliang Guan 已提交
1473
  if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, pVgroup) != 0) return -1;
S
Shengliang Guan 已提交
1474 1475 1476 1477 1478 1479 1480

  return 0;
}

static int32_t mndRedistributeVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup, SDnodeObj *pNew1,
                                     SDnodeObj *pOld1, SDnodeObj *pNew2, SDnodeObj *pOld2, SDnodeObj *pNew3,
                                     SDnodeObj *pOld3) {
1481 1482
  int32_t code = -1;
  STrans *pTrans = NULL;
S
Shengliang Guan 已提交
1483

1484
  pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "red-vgroup");
S
Shengliang Guan 已提交
1485 1486
  if (pTrans == NULL) goto _OVER;
  mndTransSetSerial(pTrans);
1487
  mInfo("trans:%d, used to redistribute vgroup, vgId:%d", pTrans->id, pVgroup->vgId);
S
Shengliang Guan 已提交
1488 1489 1490 1491 1492

  SVgObj newVg = {0};
  memcpy(&newVg, pVgroup, sizeof(SVgObj));
  mInfo("vgId:%d, vgroup info before redistribute, replica:%d", newVg.vgId, newVg.replica);
  for (int32_t i = 0; i < newVg.replica; ++i) {
S
Shengliang Guan 已提交
1493
    mInfo("vgId:%d, vnode:%d dnode:%d role:%s", newVg.vgId, i, newVg.vnodeGid[i].dnodeId,
1494
          syncStr(newVg.vnodeGid[i].syncState));
S
Shengliang Guan 已提交
1495 1496
  }

S
Shengliang Guan 已提交
1497
  if (pNew1 != NULL && pOld1 != NULL) {
1498 1499 1500 1501 1502 1503 1504
    int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew1->id);
    if (numOfVnodes >= pNew1->numOfSupportVnodes) {
      mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew1->id, numOfVnodes,
             pNew1->numOfSupportVnodes);
      terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES;
      goto _OVER;
    }
1505 1506 1507 1508 1509 1510

    int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
    if (pNew1->memAvail - vgMem - pNew1->memUsed <= 0) {
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d avail:%" PRId64 " used:%" PRId64,
             pVgroup->dbName, pVgroup->vgId, vgMem, pNew1->id, pNew1->memAvail, pNew1->memUsed);
      terrno = TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE;
S
Shengliang Guan 已提交
1511
      goto _OVER;
1512 1513 1514 1515
    } else {
      pNew1->memUsed += vgMem;
    }

1516 1517 1518
    if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew1->id) != 0) goto _OVER;
    if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld1->id) != 0) goto _OVER;
  }
S
Shengliang Guan 已提交
1519 1520

  if (pNew2 != NULL && pOld2 != NULL) {
1521 1522 1523 1524 1525 1526 1527
    int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew2->id);
    if (numOfVnodes >= pNew2->numOfSupportVnodes) {
      mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew2->id, numOfVnodes,
             pNew2->numOfSupportVnodes);
      terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES;
      goto _OVER;
    }
1528 1529 1530 1531 1532
    int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
    if (pNew2->memAvail - vgMem - pNew2->memUsed <= 0) {
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d avail:%" PRId64 " used:%" PRId64,
             pVgroup->dbName, pVgroup->vgId, vgMem, pNew2->id, pNew2->memAvail, pNew2->memUsed);
      terrno = TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE;
S
Shengliang Guan 已提交
1533
      goto _OVER;
1534 1535 1536
    } else {
      pNew2->memUsed += vgMem;
    }
S
Shengliang Guan 已提交
1537 1538
    if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew2->id) != 0) goto _OVER;
    if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld2->id) != 0) goto _OVER;
1539
  }
S
Shengliang Guan 已提交
1540 1541

  if (pNew3 != NULL && pOld3 != NULL) {
1542 1543 1544 1545 1546 1547 1548
    int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew3->id);
    if (numOfVnodes >= pNew3->numOfSupportVnodes) {
      mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew3->id, numOfVnodes,
             pNew3->numOfSupportVnodes);
      terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES;
      goto _OVER;
    }
1549 1550 1551 1552 1553
    int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
    if (pNew3->memAvail - vgMem - pNew3->memUsed <= 0) {
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d avail:%" PRId64 " used:%" PRId64,
             pVgroup->dbName, pVgroup->vgId, vgMem, pNew3->id, pNew3->memAvail, pNew3->memUsed);
      terrno = TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE;
1554
      goto _OVER;
1555 1556 1557
    } else {
      pNew3->memUsed += vgMem;
    }
S
Shengliang Guan 已提交
1558 1559 1560 1561
    if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew3->id) != 0) goto _OVER;
    if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld3->id) != 0) goto _OVER;
  }

1562
  {
1563
    SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
S
Shengliang Guan 已提交
1564
    if (pRaw == NULL) goto _OVER;
1565 1566
    if (mndTransAppendCommitlog(pTrans, pRaw) != 0) {
      sdbFreeRaw(pRaw);
S
Shengliang Guan 已提交
1567
      goto _OVER;
1568
    }
S
Shengliang Guan 已提交
1569
    (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
1570
  }
S
Shengliang Guan 已提交
1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586

  mInfo("vgId:%d, vgroup info after redistribute, replica:%d", newVg.vgId, newVg.replica);
  for (int32_t i = 0; i < newVg.replica; ++i) {
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
  }

  if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER;
  code = 0;

_OVER:
  mndTransDrop(pTrans);
  mndReleaseDb(pMnode, pDb);
  return code;
}

static int32_t mndProcessRedistributeVgroupMsg(SRpcMsg *pReq) {
1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597
  SMnode    *pMnode = pReq->info.node;
  SDnodeObj *pNew1 = NULL;
  SDnodeObj *pNew2 = NULL;
  SDnodeObj *pNew3 = NULL;
  SDnodeObj *pOld1 = NULL;
  SDnodeObj *pOld2 = NULL;
  SDnodeObj *pOld3 = NULL;
  SVgObj    *pVgroup = NULL;
  SDbObj    *pDb = NULL;
  int32_t    code = -1;
  int64_t    curMs = taosGetTimestampMs();
S
Shengliang Guan 已提交
1598 1599 1600 1601
  int32_t    newDnodeId[3] = {0};
  int32_t    oldDnodeId[3] = {0};
  int32_t    newIndex = -1;
  int32_t    oldIndex = -1;
1602

S
Shengliang Guan 已提交
1603 1604
  SRedistributeVgroupReq req = {0};
  if (tDeserializeSRedistributeVgroupReq(pReq->pCont, pReq->contLen, &req) != 0) {
S
Shengliang Guan 已提交
1605 1606 1607 1608
    terrno = TSDB_CODE_INVALID_MSG;
    goto _OVER;
  }

S
Shengliang Guan 已提交
1609
  mInfo("vgId:%d, start to redistribute vgroup to dnode %d:%d:%d", req.vgId, req.dnodeId1, req.dnodeId2, req.dnodeId3);
1610
  if (mndCheckOperPrivilege(pMnode, pReq->info.conn.user, MND_OPER_REDISTRIBUTE_VGROUP) != 0) {
S
Shengliang Guan 已提交
1611 1612
    goto _OVER;
  }
S
Shengliang Guan 已提交
1613

S
Shengliang Guan 已提交
1614
  pVgroup = mndAcquireVgroup(pMnode, req.vgId);
S
Shengliang Guan 已提交
1615 1616 1617 1618 1619 1620
  if (pVgroup == NULL) goto _OVER;

  pDb = mndAcquireDb(pMnode, pVgroup->dbName);
  if (pDb == NULL) goto _OVER;

  if (pVgroup->replica == 1) {
S
Shengliang Guan 已提交
1621
    if (req.dnodeId1 <= 0 || req.dnodeId2 > 0 || req.dnodeId3 > 0) {
1622 1623 1624
      terrno = TSDB_CODE_MND_INVALID_REPLICA;
      goto _OVER;
    }
S
Shengliang Guan 已提交
1625 1626

    if (req.dnodeId1 == pVgroup->vnodeGid[0].dnodeId) {
1627 1628
      // terrno = TSDB_CODE_MND_VGROUP_UN_CHANGED;
      code = 0;
S
Shengliang Guan 已提交
1629 1630
      goto _OVER;
    }
S
Shengliang Guan 已提交
1631 1632 1633 1634 1635

    pNew1 = mndAcquireDnode(pMnode, req.dnodeId1);
    if (pNew1 == NULL) goto _OVER;
    if (!mndIsDnodeOnline(pNew1, curMs)) {
      terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
S
Shengliang Guan 已提交
1636 1637
      goto _OVER;
    }
S
Shengliang Guan 已提交
1638 1639 1640 1641

    pOld1 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[0].dnodeId);
    if (pOld1 == NULL) goto _OVER;
    if (!mndIsDnodeOnline(pOld1, curMs)) {
1642 1643 1644
      terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
      goto _OVER;
    }
S
Shengliang Guan 已提交
1645

1646
    code = mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, NULL, NULL, NULL, NULL);
S
Shengliang Guan 已提交
1647

1648
  } else if (pVgroup->replica == 3) {
S
Shengliang Guan 已提交
1649
    if (req.dnodeId1 <= 0 || req.dnodeId2 <= 0 || req.dnodeId3 <= 0) {
1650 1651 1652
      terrno = TSDB_CODE_MND_INVALID_REPLICA;
      goto _OVER;
    }
S
Shengliang Guan 已提交
1653 1654

    if (req.dnodeId1 == req.dnodeId2 || req.dnodeId1 == req.dnodeId3 || req.dnodeId2 == req.dnodeId3) {
1655 1656 1657
      terrno = TSDB_CODE_MND_INVALID_REPLICA;
      goto _OVER;
    }
S
Shengliang Guan 已提交
1658 1659 1660 1661

    if (req.dnodeId1 != pVgroup->vnodeGid[0].dnodeId && req.dnodeId1 != pVgroup->vnodeGid[1].dnodeId &&
        req.dnodeId1 != pVgroup->vnodeGid[2].dnodeId) {
      newDnodeId[++newIndex] = req.dnodeId1;
S
Shengliang Guan 已提交
1662
      mInfo("vgId:%d, dnode:%d will be added, index:%d", pVgroup->vgId, newDnodeId[newIndex], newIndex);
S
Shengliang Guan 已提交
1663
    }
S
Shengliang Guan 已提交
1664 1665 1666 1667

    if (req.dnodeId2 != pVgroup->vnodeGid[0].dnodeId && req.dnodeId2 != pVgroup->vnodeGid[1].dnodeId &&
        req.dnodeId2 != pVgroup->vnodeGid[2].dnodeId) {
      newDnodeId[++newIndex] = req.dnodeId2;
S
Shengliang Guan 已提交
1668
      mInfo("vgId:%d, dnode:%d will be added, index:%d", pVgroup->vgId, newDnodeId[newIndex], newIndex);
S
Shengliang Guan 已提交
1669 1670 1671 1672 1673
    }

    if (req.dnodeId3 != pVgroup->vnodeGid[0].dnodeId && req.dnodeId3 != pVgroup->vnodeGid[1].dnodeId &&
        req.dnodeId3 != pVgroup->vnodeGid[2].dnodeId) {
      newDnodeId[++newIndex] = req.dnodeId3;
S
Shengliang Guan 已提交
1674
      mInfo("vgId:%d, dnode:%d will be added, index:%d", pVgroup->vgId, newDnodeId[newIndex], newIndex);
S
Shengliang Guan 已提交
1675 1676 1677 1678 1679
    }

    if (req.dnodeId1 != pVgroup->vnodeGid[0].dnodeId && req.dnodeId2 != pVgroup->vnodeGid[0].dnodeId &&
        req.dnodeId3 != pVgroup->vnodeGid[0].dnodeId) {
      oldDnodeId[++oldIndex] = pVgroup->vnodeGid[0].dnodeId;
S
Shengliang Guan 已提交
1680
      mInfo("vgId:%d, dnode:%d will be removed, index:%d", pVgroup->vgId, oldDnodeId[oldIndex], oldIndex);
S
Shengliang Guan 已提交
1681 1682 1683 1684 1685
    }

    if (req.dnodeId1 != pVgroup->vnodeGid[1].dnodeId && req.dnodeId2 != pVgroup->vnodeGid[1].dnodeId &&
        req.dnodeId3 != pVgroup->vnodeGid[1].dnodeId) {
      oldDnodeId[++oldIndex] = pVgroup->vnodeGid[1].dnodeId;
S
Shengliang Guan 已提交
1686
      mInfo("vgId:%d, dnode:%d will be removed, index:%d", pVgroup->vgId, oldDnodeId[oldIndex], oldIndex);
S
Shengliang Guan 已提交
1687 1688 1689 1690 1691
    }

    if (req.dnodeId1 != pVgroup->vnodeGid[2].dnodeId && req.dnodeId2 != pVgroup->vnodeGid[2].dnodeId &&
        req.dnodeId3 != pVgroup->vnodeGid[2].dnodeId) {
      oldDnodeId[++oldIndex] = pVgroup->vnodeGid[2].dnodeId;
S
Shengliang Guan 已提交
1692
      mInfo("vgId:%d, dnode:%d will be removed, index:%d", pVgroup->vgId, oldDnodeId[oldIndex], oldIndex);
S
Shengliang Guan 已提交
1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749
    }

    if (newDnodeId[0] != 0) {
      pNew1 = mndAcquireDnode(pMnode, newDnodeId[0]);
      if (pNew1 == NULL) goto _OVER;
      if (!mndIsDnodeOnline(pNew1, curMs)) {
        terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
        goto _OVER;
      }
    }

    if (newDnodeId[1] != 0) {
      pNew2 = mndAcquireDnode(pMnode, newDnodeId[1]);
      if (pNew2 == NULL) goto _OVER;
      if (!mndIsDnodeOnline(pNew2, curMs)) {
        terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
        goto _OVER;
      }
    }

    if (newDnodeId[2] != 0) {
      pNew3 = mndAcquireDnode(pMnode, newDnodeId[2]);
      if (pNew3 == NULL) goto _OVER;
      if (!mndIsDnodeOnline(pNew3, curMs)) {
        terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
        goto _OVER;
      }
    }

    if (oldDnodeId[0] != 0) {
      pOld1 = mndAcquireDnode(pMnode, oldDnodeId[0]);
      if (pOld1 == NULL) goto _OVER;
      if (!mndIsDnodeOnline(pOld1, curMs)) {
        terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
        goto _OVER;
      }
    }

    if (oldDnodeId[1] != 0) {
      pOld2 = mndAcquireDnode(pMnode, oldDnodeId[1]);
      if (pOld2 == NULL) goto _OVER;
      if (!mndIsDnodeOnline(pOld2, curMs)) {
        terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
        goto _OVER;
      }
    }

    if (oldDnodeId[2] != 0) {
      pOld3 = mndAcquireDnode(pMnode, oldDnodeId[2]);
      if (pOld3 == NULL) goto _OVER;
      if (!mndIsDnodeOnline(pOld3, curMs)) {
        terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
        goto _OVER;
      }
    }

    if (pNew1 == NULL && pOld1 == NULL && pNew2 == NULL && pOld2 == NULL && pNew3 == NULL && pOld3 == NULL) {
1750 1751
      // terrno = TSDB_CODE_MND_VGROUP_UN_CHANGED;
      code = 0;
1752 1753
      goto _OVER;
    }
S
Shengliang Guan 已提交
1754

1755
    code = mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, pNew2, pOld2, pNew3, pOld3);
S
Shengliang Guan 已提交
1756

1757 1758 1759
  } else {
    terrno = TSDB_CODE_MND_INVALID_REPLICA;
    goto _OVER;
S
Shengliang Guan 已提交
1760 1761 1762 1763 1764 1765
  }

  if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS;

_OVER:
  if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {
S
Shengliang Guan 已提交
1766 1767
    mError("vgId:%d, failed to redistribute to dnode %d:%d:%d since %s", req.vgId, req.dnodeId1, req.dnodeId2,
           req.dnodeId3, terrstr());
S
Shengliang Guan 已提交
1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781
  }

  mndReleaseDnode(pMnode, pNew1);
  mndReleaseDnode(pMnode, pNew2);
  mndReleaseDnode(pMnode, pNew3);
  mndReleaseDnode(pMnode, pOld1);
  mndReleaseDnode(pMnode, pOld2);
  mndReleaseDnode(pMnode, pOld3);
  mndReleaseVgroup(pMnode, pVgroup);
  mndReleaseDb(pMnode, pDb);

  return code;
}

C
cadem 已提交
1782
static void *mndBuildSForceBecomeFollowerReq(SMnode *pMnode, SVgObj *pVgroup, int32_t dnodeId,
C
cadem 已提交
1783
                                          int32_t *pContLen) {
C
cadem 已提交
1784
  SForceBecomeFollowerReq balanceReq = {
C
cadem 已提交
1785 1786 1787
      .vgId = pVgroup->vgId,
  };

C
cadem 已提交
1788
  int32_t contLen = tSerializeSForceBecomeFollowerReq(NULL, 0, &balanceReq);
C
cadem 已提交
1789 1790 1791 1792
  if (contLen < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
C
cadem 已提交
1793
  contLen += sizeof(SMsgHead);
C
cadem 已提交
1794 1795 1796 1797 1798 1799 1800

  void *pReq = taosMemoryMalloc(contLen);
  if (pReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

C
cadem 已提交
1801 1802 1803 1804 1805
  SMsgHead *pHead = pReq;
  pHead->contLen = htonl(contLen);
  pHead->vgId = htonl(pVgroup->vgId);

  tSerializeSForceBecomeFollowerReq((char *)pReq + sizeof(SMsgHead), contLen, &balanceReq);
C
cadem 已提交
1806
  *pContLen = contLen;
C
cadem 已提交
1807
  return pReq;                                  
C
cadem 已提交
1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818
}

int32_t mndAddBalanceVgroupLeaderAction(SMnode *pMnode, STrans *pTrans, SVgObj *pVgroup, int32_t dnodeId) {
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
  if (pDnode == NULL) return -1;

  STransAction action = {0};
  action.epSet = mndGetDnodeEpset(pDnode);
  mndReleaseDnode(pMnode, pDnode);

  int32_t contLen = 0;
C
cadem 已提交
1819
  void   *pReq = mndBuildSForceBecomeFollowerReq(pMnode, pVgroup, dnodeId, &contLen);
C
cadem 已提交
1820 1821 1822 1823
  if (pReq == NULL) return -1;

  action.pCont = pReq;
  action.contLen = contLen;
C
cadem 已提交
1824
  action.msgType = TDMT_SYNC_FORCE_FOLLOWER;
C
cadem 已提交
1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839

  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    taosMemoryFree(pReq);
    return -1;
  }

  return 0;
}

int32_t mndAddVgroupBalanceToTrans(SMnode *pMnode, SVgObj *pVgroup, STrans *pTrans){
  SSdb *pSdb = pMnode->pSdb;

  int32_t vgid = pVgroup->vgId;
  int8_t replica = pVgroup->replica;

C
cadem 已提交
1840
 if(pVgroup->replica <= 1) {
C
cadem 已提交
1841 1842 1843 1844
    mInfo("trans:%d, vgid:%d no need to balance, replica:%d", pTrans->id, vgid, replica);
    return -1;
  }

C
cadem 已提交
1845 1846 1847 1848 1849 1850 1851 1852 1853
  int32_t dnodeId = pVgroup->vnodeGid[0].dnodeId;

  for(int i = 0; i < replica; i++)
  {
    if(pVgroup->vnodeGid[i].syncState == TAOS_SYNC_STATE_LEADER){
      dnodeId = pVgroup->vnodeGid[i].dnodeId;
      break;
    }
  }
C
cadem 已提交
1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893

  bool       exist = false;
  bool       online = false;
  int64_t curMs = taosGetTimestampMs();
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
  if (pDnode != NULL) {
    exist = true;
    online = mndIsDnodeOnline(pDnode, curMs);
    mndReleaseDnode(pMnode, pDnode);
  }

  if(exist && online)
  {
    mInfo("trans:%d, vgid:%d leader to dnode:%d", pTrans->id, vgid, dnodeId);

    if (mndAddBalanceVgroupLeaderAction(pMnode, pTrans, pVgroup, dnodeId) != 0) {
      mError("trans:%d, vgid:%d failed to be balanced to dnode:%d", pTrans->id, vgid, dnodeId);
      return -1;
    }

    SSdbRaw *pRaw = mndVgroupActionEncode(pVgroup);
    if (pRaw == NULL) {
      mError("trans:%d, vgid:%d failed to encode action to dnode:%d", pTrans->id, vgid, dnodeId);
      return -1;
    }
    if (mndTransAppendCommitlog(pTrans, pRaw) != 0) {
      sdbFreeRaw(pRaw);
      mError("trans:%d, vgid:%d failed to append commit log dnode:%d", pTrans->id, vgid, dnodeId);
      return -1;
    }
    (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
  }
  else
  {
    mInfo("trans:%d, vgid:%d cant be balanced to dnode:%d, exist:%d, online:%d", pTrans->id, vgid, dnodeId, exist, online);
  }

  return 0;
}

C
cadem 已提交
1894
extern int32_t mndProcessVgroupBalanceLeaderMsgImp(SRpcMsg *pReq);
C
cadem 已提交
1895

C
cadem 已提交
1896 1897 1898
int32_t mndProcessVgroupBalanceLeaderMsg(SRpcMsg *pReq) {
  return mndProcessVgroupBalanceLeaderMsgImp(pReq);
}
C
cadem 已提交
1899

C
cadem 已提交
1900 1901 1902
#ifndef TD_ENTERPRISE
int32_t mndProcessVgroupBalanceLeaderMsgImp(SRpcMsg *pReq) {
  return 0;
C
cadem 已提交
1903
}
C
cadem 已提交
1904
#endif
C
cadem 已提交
1905

1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930
static int32_t mndCheckDnodeMemory(SMnode *pMnode, SDbObj *pOldDb, SDbObj *pNewDb, SVgObj *pOldVgroup,
                                   SVgObj *pNewVgroup, SArray *pArray) {
  for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pArray); ++i) {
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
    bool       inVgroup = false;
    for (int32_t j = 0; j < pOldVgroup->replica; ++j) {
      SVnodeGid *pVgId = &pOldVgroup->vnodeGid[i];
      if (pDnode->id == pVgId->dnodeId) {
        pDnode->memUsed -= mndGetVgroupMemory(pMnode, pOldDb, pOldVgroup);
        inVgroup = true;
      }
    }
    for (int32_t j = 0; j < pNewVgroup->replica; ++j) {
      SVnodeGid *pVgId = &pNewVgroup->vnodeGid[i];
      if (pDnode->id == pVgId->dnodeId) {
        pDnode->memUsed += mndGetVgroupMemory(pMnode, pNewDb, pNewVgroup);
        inVgroup = true;
      }
    }
    if (pDnode->memAvail - pDnode->memUsed <= 0) {
      mError("db:%s, vgId:%d, no enough memory in dnode:%d, avail:%" PRId64 " used:%" PRId64, pNewVgroup->dbName,
             pNewVgroup->vgId, pDnode->id, pDnode->memAvail, pDnode->memUsed);
      terrno = TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE;
      return -1;
    } else if (inVgroup) {
H
Hongze Cheng 已提交
1931 1932
      mInfo("db:%s, vgId:%d, memory in dnode:%d, avail:%" PRId64 " used:%" PRId64, pNewVgroup->dbName, pNewVgroup->vgId,
            pDnode->id, pDnode->memAvail, pDnode->memUsed);
1933 1934
    } else {
    }
S
Shengliang Guan 已提交
1935
  }
1936 1937
  return 0;
}
S
Shengliang Guan 已提交
1938

1939 1940
int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pOldDb, SDbObj *pNewDb, SVgObj *pVgroup,
                                  SArray *pArray) {
S
Shengliang Guan 已提交
1941 1942
  SVgObj newVgroup = {0};
  memcpy(&newVgroup, pVgroup, sizeof(SVgObj));
1943 1944

  if (pVgroup->replica <= 0 || pVgroup->replica == pNewDb->cfg.replications) {
S
Shengliang Guan 已提交
1945
    if (mndAddAlterVnodeConfigAction(pMnode, pTrans, pNewDb, pVgroup) != 0) return -1;
1946 1947 1948 1949
    if (mndCheckDnodeMemory(pMnode, pOldDb, pNewDb, &newVgroup, pVgroup, pArray) != 0) return -1;
    return 0;
  }

S
Shengliang Guan 已提交
1950 1951
  mndTransSetSerial(pTrans);

S
Shengliang Guan 已提交
1952 1953
  if (newVgroup.replica == 1 && pNewDb->cfg.replications == 3) {
    mInfo("db:%s, vgId:%d, will add 2 vnodes, vn:0 dnode:%d", pVgroup->dbName, pVgroup->vgId,
S
Shengliang Guan 已提交
1954 1955
          pVgroup->vnodeGid[0].dnodeId);

1956
    if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1;
S
Shengliang Guan 已提交
1957 1958
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
      return -1;
1959
    if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[1]) != 0) return -1;
1960
    if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
1961

1962
    if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1;
1963 1964 1965 1966
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
      return -1;
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId) != 0)
      return -1;
S
Shengliang Guan 已提交
1967 1968
    if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[2]) != 0) return -1;
    if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
S
Shengliang Guan 已提交
1969
  } else if (newVgroup.replica == 3 && pNewDb->cfg.replications == 1) {
S
Shengliang Guan 已提交
1970 1971
    mInfo("db:%s, vgId:%d, will remove 2 vnodes, vn:0 dnode:%d vn:1 dnode:%d vn:2 dnode:%d", pVgroup->dbName,
          pVgroup->vgId, pVgroup->vnodeGid[0].dnodeId, pVgroup->vnodeGid[1].dnodeId, pVgroup->vnodeGid[2].dnodeId);
S
Shengliang Guan 已提交
1972

S
Shengliang Guan 已提交
1973
    SVnodeGid del1 = {0};
S
Shengliang Guan 已提交
1974
    SVnodeGid del2 = {0};
1975
    if (mndRemoveVnodeFromVgroup(pMnode, pTrans, &newVgroup, pArray, &del1) != 0) return -1;
S
Shengliang Guan 已提交
1976
    if (mndAddDropVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &del1, true) != 0) return -1;
1977 1978 1979 1980
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
      return -1;
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId) != 0)
      return -1;
S
Shengliang Guan 已提交
1981
    if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
1982

1983
    if (mndRemoveVnodeFromVgroup(pMnode, pTrans, &newVgroup, pArray, &del2) != 0) return -1;
1984
    if (mndAddDropVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &del2, true) != 0) return -1;
S
Shengliang Guan 已提交
1985 1986
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
      return -1;
1987
    if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
S
Shengliang Guan 已提交
1988
  } else {
S
Shengliang Guan 已提交
1989
    return -1;
S
Shengliang Guan 已提交
1990
  }
S
Shengliang Guan 已提交
1991

S
Shengliang Guan 已提交
1992 1993 1994 1995 1996 1997
  {
    SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup);
    if (pVgRaw == NULL) return -1;
    if (mndTransAppendCommitlog(pTrans, pVgRaw) != 0) {
      sdbFreeRaw(pVgRaw);
      return -1;
S
Shengliang Guan 已提交
1998
    }
S
Shengliang Guan 已提交
1999
    (void)sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
S
Shengliang Guan 已提交
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009
  }

  return 0;
}

static int32_t mndAddAdjustVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup) {
  return 0;
}

static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) {
2010 2011 2012
  int32_t  code = -1;
  STrans  *pTrans = NULL;
  SSdbRaw *pRaw = NULL;
2013
  SDbObj   dbObj = {0};
2014
  SArray  *pArray = mndBuildDnodesArray(pMnode, 0);
S
Shengliang Guan 已提交
2015

2016
  pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "split-vgroup");
S
Shengliang Guan 已提交
2017 2018
  if (pTrans == NULL) goto _OVER;
  mndTransSetSerial(pTrans);
2019
  mInfo("trans:%d, used to split vgroup, vgId:%d", pTrans->id, pVgroup->vgId);
S
Shengliang Guan 已提交
2020 2021 2022 2023 2024 2025 2026 2027 2028 2029

  SVgObj newVg1 = {0};
  memcpy(&newVg1, pVgroup, sizeof(SVgObj));
  mInfo("vgId:%d, vgroup info before split, replica:%d hashBegin:%u hashEnd:%u", newVg1.vgId, newVg1.replica,
        newVg1.hashBegin, newVg1.hashEnd);
  for (int32_t i = 0; i < newVg1.replica; ++i) {
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg1.vgId, i, newVg1.vnodeGid[i].dnodeId);
  }

  if (newVg1.replica == 1) {
2030
    if (mndAddVnodeToVgroup(pMnode, pTrans, &newVg1, pArray) != 0) goto _OVER;
S
Shengliang Guan 已提交
2031
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[0].dnodeId) != 0) goto _OVER;
2032
    if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg1, &newVg1.vnodeGid[1]) != 0) goto _OVER;
S
Shengliang Guan 已提交
2033 2034
  } else if (newVg1.replica == 3) {
    SVnodeGid del1 = {0};
2035
    if (mndRemoveVnodeFromVgroup(pMnode, pTrans, &newVg1, pArray, &del1) != 0) goto _OVER;
2036
    if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVg1, &del1, true) != 0) goto _OVER;
S
Shengliang Guan 已提交
2037 2038
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[0].dnodeId) != 0) goto _OVER;
    if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[1].dnodeId) != 0) goto _OVER;
S
Shengliang Guan 已提交
2039 2040 2041 2042
  } else {
    goto _OVER;
  }

2043 2044 2045 2046 2047
  for (int32_t i = 0; i < newVg1.replica; ++i) {
    if (mndAddDisableVnodeWriteAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[i].dnodeId) != 0) goto _OVER;
  }
  if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1) != 0) goto _OVER;

S
Shengliang Guan 已提交
2048
  SVgObj newVg2 = {0};
S
Shengliang Guan 已提交
2049
  memcpy(&newVg2, &newVg1, sizeof(SVgObj));
S
Shengliang Guan 已提交
2050
  newVg1.replica = 1;
S
Shengliang Guan 已提交
2051
  newVg1.hashEnd = newVg1.hashBegin / 2 + newVg1.hashEnd / 2;
S
Shengliang Guan 已提交
2052 2053 2054 2055 2056
  memset(&newVg1.vnodeGid[1], 0, sizeof(SVnodeGid));

  newVg2.replica = 1;
  newVg2.hashBegin = newVg1.hashEnd + 1;
  memcpy(&newVg2.vnodeGid[0], &newVg2.vnodeGid[1], sizeof(SVnodeGid));
S
Shengliang Guan 已提交
2057 2058
  memset(&newVg2.vnodeGid[1], 0, sizeof(SVnodeGid));

2059 2060
  mInfo("vgId:%d, vgroup info after split, replica:%d hashrange:[%u, %u] vnode:0 dnode:%d", newVg1.vgId, newVg1.replica,
        newVg1.hashBegin, newVg1.hashEnd, newVg1.vnodeGid[0].dnodeId);
2061 2062 2063
  for (int32_t i = 0; i < newVg1.replica; ++i) {
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg1.vgId, i, newVg1.vnodeGid[i].dnodeId);
  }
2064 2065
  mInfo("vgId:%d, vgroup info after split, replica:%d hashrange:[%u, %u] vnode:0 dnode:%d", newVg2.vgId, newVg2.replica,
        newVg2.hashBegin, newVg2.hashEnd, newVg2.vnodeGid[0].dnodeId);
2066 2067 2068
  for (int32_t i = 0; i < newVg1.replica; ++i) {
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg2.vgId, i, newVg2.vnodeGid[i].dnodeId);
  }
S
Shengliang Guan 已提交
2069

2070 2071 2072 2073 2074 2075 2076
  int32_t maxVgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP);
  if (mndAddAlterVnodeHashRangeAction(pMnode, pTrans, &newVg1, maxVgId) != 0) goto _OVER;
  newVg1.vgId = maxVgId;

  maxVgId++;
  if (mndAddAlterVnodeHashRangeAction(pMnode, pTrans, &newVg2, maxVgId) != 0) goto _OVER;
  newVg2.vgId = maxVgId;
S
Shengliang Guan 已提交
2077

S
Shengliang Guan 已提交
2078 2079 2080 2081 2082 2083 2084 2085
  // adjust vgroup replica
  if (pDb->cfg.replications != newVg1.replica) {
    if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, pDb, &newVg1, pArray) != 0) goto _OVER;
  }
  if (pDb->cfg.replications != newVg2.replica) {
    if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, pDb, &newVg2, pArray) != 0) goto _OVER;
  }

2086 2087 2088 2089 2090
  pRaw = mndVgroupActionEncode(&newVg1);
  if (pRaw == NULL) goto _OVER;
  if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER;
  (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
  pRaw = NULL;
S
Shengliang Guan 已提交
2091

2092 2093 2094 2095 2096
  pRaw = mndVgroupActionEncode(&newVg2);
  if (pRaw == NULL) goto _OVER;
  if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER;
  (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
  pRaw = NULL;
S
Shengliang Guan 已提交
2097

2098 2099 2100 2101 2102
  pRaw = mndVgroupActionEncode(pVgroup);
  if (pRaw == NULL) goto _OVER;
  if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER;
  (void)sdbSetRawStatus(pRaw, SDB_STATUS_DROPPED);
  pRaw = NULL;
2103

2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116
  memcpy(&dbObj, pDb, sizeof(SDbObj));
  if (dbObj.cfg.pRetensions != NULL) {
    dbObj.cfg.pRetensions = taosArrayDup(pDb->cfg.pRetensions, NULL);
    if (dbObj.cfg.pRetensions == NULL) goto _OVER;
  }
  dbObj.vgVersion++;
  dbObj.updateTime = taosGetTimestampMs();
  dbObj.cfg.numOfVgroups++;
  pRaw = mndDbActionEncode(&dbObj);
  if (pRaw == NULL) goto _OVER;
  if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER;
  (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
  pRaw = NULL;
S
Shengliang Guan 已提交
2117 2118 2119

  if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER;
  code = 0;
S
Shengliang Guan 已提交
2120 2121

_OVER:
S
Shengliang Guan 已提交
2122
  taosArrayDestroy(pArray);
S
Shengliang Guan 已提交
2123
  mndTransDrop(pTrans);
2124
  sdbFreeRaw(pRaw);
2125
  taosArrayDestroy(dbObj.cfg.pRetensions);
S
Shengliang Guan 已提交
2126 2127 2128 2129
  return code;
}

static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) {
2130 2131 2132 2133
  SMnode *pMnode = pReq->info.node;
  int32_t code = -1;
  SVgObj *pVgroup = NULL;
  SDbObj *pDb = NULL;
S
Shengliang Guan 已提交
2134

2135 2136 2137 2138 2139 2140 2141
  SSplitVgroupReq req = {0};
  if (tDeserializeSSplitVgroupReq(pReq->pCont, pReq->contLen, &req) != 0) {
    terrno = TSDB_CODE_INVALID_MSG;
    goto _OVER;
  }

  mInfo("vgId:%d, start to split", req.vgId);
2142
  if (mndCheckOperPrivilege(pMnode, pReq->info.conn.user, MND_OPER_SPLIT_VGROUP) != 0) {
S
Shengliang Guan 已提交
2143 2144
    goto _OVER;
  }
S
Shengliang Guan 已提交
2145

2146
  pVgroup = mndAcquireVgroup(pMnode, req.vgId);
S
Shengliang Guan 已提交
2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159
  if (pVgroup == NULL) goto _OVER;

  pDb = mndAcquireDb(pMnode, pVgroup->dbName);
  if (pDb == NULL) goto _OVER;

  code = mndSplitVgroup(pMnode, pReq, pDb, pVgroup);
  if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS;

_OVER:
  mndReleaseVgroup(pMnode, pVgroup);
  mndReleaseDb(pMnode, pDb);
  return code;
}
S
Shengliang Guan 已提交
2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172

static int32_t mndSetBalanceVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
                                              SDnodeObj *pSrc, SDnodeObj *pDst) {
  SVgObj newVg = {0};
  memcpy(&newVg, pVgroup, sizeof(SVgObj));
  mInfo("vgId:%d, vgroup info before balance, replica:%d", newVg.vgId, newVg.replica);
  for (int32_t i = 0; i < newVg.replica; ++i) {
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
  }

  if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pDst->id) != 0) return -1;
  if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pSrc->id) != 0) return -1;

2173 2174
  {
    SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
2175 2176
    if (pRaw == NULL) return -1;
    if (mndTransAppendCommitlog(pTrans, pRaw) != 0) {
2177 2178 2179
      sdbFreeRaw(pRaw);
      return -1;
    }
S
Shengliang Guan 已提交
2180
    (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY);
S
Shengliang Guan 已提交
2181 2182 2183 2184 2185 2186 2187 2188 2189
  }

  mInfo("vgId:%d, vgroup info after balance, replica:%d", newVg.vgId, newVg.replica);
  for (int32_t i = 0; i < newVg.replica; ++i) {
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
  }
  return 0;
}

S
Shengliang Guan 已提交
2190 2191
static int32_t mndBalanceVgroupBetweenDnode(SMnode *pMnode, STrans *pTrans, SDnodeObj *pSrc, SDnodeObj *pDst,
                                            SHashObj *pBalancedVgroups) {
S
Shengliang Guan 已提交
2192 2193
  void   *pIter = NULL;
  int32_t code = -1;
S
Shengliang Guan 已提交
2194
  SSdb   *pSdb = pMnode->pSdb;
S
Shengliang Guan 已提交
2195 2196 2197

  while (1) {
    SVgObj *pVgroup = NULL;
S
Shengliang Guan 已提交
2198
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
S
Shengliang Guan 已提交
2199
    if (pIter == NULL) break;
S
Shengliang Guan 已提交
2200 2201 2202 2203
    if (taosHashGet(pBalancedVgroups, &pVgroup->vgId, sizeof(int32_t)) != NULL) {
      sdbRelease(pSdb, pVgroup);
      continue;
    }
S
Shengliang Guan 已提交
2204 2205 2206 2207 2208 2209 2210 2211 2212 2213

    bool existInSrc = false;
    bool existInDst = false;
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
      SVnodeGid *pGid = &pVgroup->vnodeGid[i];
      if (pGid->dnodeId == pSrc->id) existInSrc = true;
      if (pGid->dnodeId == pDst->id) existInDst = true;
    }

    if (!existInSrc || existInDst) {
S
Shengliang Guan 已提交
2214 2215
      sdbRelease(pSdb, pVgroup);
      continue;
S
Shengliang Guan 已提交
2216 2217 2218 2219
    }

    SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
    code = mndSetBalanceVgroupInfoToTrans(pMnode, pTrans, pDb, pVgroup, pSrc, pDst);
S
Shengliang Guan 已提交
2220 2221 2222
    if (code == 0) {
      code = taosHashPut(pBalancedVgroups, &pVgroup->vgId, sizeof(int32_t), &pVgroup->vgId, sizeof(int32_t));
    }
S
Shengliang Guan 已提交
2223
    mndReleaseDb(pMnode, pDb);
S
Shengliang Guan 已提交
2224 2225
    sdbRelease(pSdb, pVgroup);
    sdbCancelFetch(pSdb, pIter);
S
Shengliang Guan 已提交
2226 2227 2228 2229 2230 2231 2232
    break;
  }

  return code;
}

static int32_t mndBalanceVgroup(SMnode *pMnode, SRpcMsg *pReq, SArray *pArray) {
2233 2234 2235
  int32_t   code = -1;
  int32_t   numOfVgroups = 0;
  STrans   *pTrans = NULL;
S
Shengliang Guan 已提交
2236 2237 2238 2239
  SHashObj *pBalancedVgroups = NULL;

  pBalancedVgroups = taosHashInit(16, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
  if (pBalancedVgroups == NULL) goto _OVER;
S
Shengliang Guan 已提交
2240

2241
  pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "balance-vgroup");
S
Shengliang Guan 已提交
2242 2243
  if (pTrans == NULL) goto _OVER;
  mndTransSetSerial(pTrans);
2244
  mInfo("trans:%d, used to balance vgroup", pTrans->id);
S
Shengliang Guan 已提交
2245 2246 2247

  while (1) {
    taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes);
S
Shengliang Guan 已提交
2248 2249
    for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) {
      SDnodeObj *pDnode = taosArrayGet(pArray, i);
2250 2251
      mInfo("dnode:%d, equivalent vnodes:%d others:%d support:%d, score:%f", pDnode->id, pDnode->numOfVnodes,
            pDnode->numOfSupportVnodes, pDnode->numOfOtherNodes, mndGetDnodeScore(pDnode, 0, 1));
S
Shengliang Guan 已提交
2252 2253 2254 2255
    }

    SDnodeObj *pSrc = taosArrayGet(pArray, taosArrayGetSize(pArray) - 1);
    SDnodeObj *pDst = taosArrayGet(pArray, 0);
S
Shengliang Guan 已提交
2256

2257 2258 2259
    float srcScore = mndGetDnodeScore(pSrc, -1, 1);
    float dstScore = mndGetDnodeScore(pDst, 1, 1);
    mInfo("trans:%d, after balance, src dnode:%d score:%f, dst dnode:%d score:%f", pTrans->id, pSrc->id, dstScore,
2260
          pDst->id, dstScore);
S
Shengliang Guan 已提交
2261 2262

    if (srcScore > dstScore - 0.000001) {
S
Shengliang Guan 已提交
2263
      code = mndBalanceVgroupBetweenDnode(pMnode, pTrans, pSrc, pDst, pBalancedVgroups);
S
Shengliang Guan 已提交
2264
      if (code == 0) {
S
Shengliang Guan 已提交
2265 2266
        pSrc->numOfVnodes--;
        pDst->numOfVnodes++;
S
Shengliang Guan 已提交
2267 2268 2269
        numOfVgroups++;
        continue;
      } else {
2270
        mInfo("trans:%d, no vgroup need to balance from dnode:%d to dnode:%d", pTrans->id, pSrc->id, pDst->id);
S
Shengliang Guan 已提交
2271
        break;
S
Shengliang Guan 已提交
2272 2273
      }
    } else {
2274
      mInfo("trans:%d, no vgroup need to balance any more", pTrans->id);
S
Shengliang Guan 已提交
2275 2276 2277 2278 2279
      break;
    }
  }

  if (numOfVgroups <= 0) {
2280
    mInfo("no need to balance vgroup");
S
Shengliang Guan 已提交
2281 2282
    code = 0;
  } else {
2283
    mInfo("start to balance vgroup, numOfVgroups:%d", numOfVgroups);
S
Shengliang Guan 已提交
2284 2285 2286 2287 2288
    if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER;
    code = TSDB_CODE_ACTION_IN_PROGRESS;
  }

_OVER:
2289
  taosHashCleanup(pBalancedVgroups);
S
Shengliang Guan 已提交
2290 2291 2292 2293 2294
  mndTransDrop(pTrans);
  return code;
}

static int32_t mndProcessBalanceVgroupMsg(SRpcMsg *pReq) {
2295 2296 2297
  SMnode *pMnode = pReq->info.node;
  int32_t code = -1;
  SArray *pArray = NULL;
S
Shengliang Guan 已提交
2298
  void   *pIter = NULL;
2299
  int64_t curMs = taosGetTimestampMs();
S
Shengliang Guan 已提交
2300

S
Shengliang Guan 已提交
2301 2302 2303 2304 2305 2306 2307
  SBalanceVgroupReq req = {0};
  if (tDeserializeSBalanceVgroupReq(pReq->pCont, pReq->contLen, &req) != 0) {
    terrno = TSDB_CODE_INVALID_MSG;
    goto _OVER;
  }

  mInfo("start to balance vgroup");
2308
  if (mndCheckOperPrivilege(pMnode, pReq->info.conn.user, MND_OPER_BALANCE_VGROUP) != 0) {
S
Shengliang Guan 已提交
2309 2310
    goto _OVER;
  }
S
Shengliang Guan 已提交
2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329

  while (1) {
    SDnodeObj *pDnode = NULL;
    pIter = sdbFetch(pMnode->pSdb, SDB_DNODE, pIter, (void **)&pDnode);
    if (pIter == NULL) break;
    if (!mndIsDnodeOnline(pDnode, curMs)) {
      terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
      mError("failed to balance vgroup since %s, dnode:%d", terrstr(), pDnode->id);
      sdbRelease(pMnode->pSdb, pDnode);
      goto _OVER;
    }

    sdbRelease(pMnode->pSdb, pDnode);
  }

  pArray = mndBuildDnodesArray(pMnode, 0);
  if (pArray == NULL) goto _OVER;

  if (taosArrayGetSize(pArray) < 2) {
2330
    mInfo("no need to balance vgroup since dnode num less than 2");
S
Shengliang Guan 已提交
2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342
    code = 0;
  } else {
    code = mndBalanceVgroup(pMnode, pReq, pArray);
  }

_OVER:
  if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {
    mError("failed to balance vgroup since %s", terrstr());
  }

  taosArrayDestroy(pArray);
  return code;
S
Shengliang Guan 已提交
2343 2344
}

L
Liu Jicong 已提交
2345
bool mndVgroupInDb(SVgObj *pVgroup, int64_t dbUid) { return !pVgroup->isTsma && pVgroup->dbUid == dbUid; }
2346

H
Hongze Cheng 已提交
2347 2348
static void *mndBuildCompactVnodeReq(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen, int64_t compactTs,
                                     STimeWindow tw) {
2349 2350 2351
  SCompactVnodeReq compactReq = {0};
  compactReq.dbUid = pDb->uid;
  compactReq.compactStartTime = compactTs;
H
Hongze Cheng 已提交
2352
  compactReq.tw = tw;
2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377
  tstrncpy(compactReq.db, pDb->name, TSDB_DB_FNAME_LEN);

  mInfo("vgId:%d, build compact vnode config req", pVgroup->vgId);
  int32_t contLen = tSerializeSCompactVnodeReq(NULL, 0, &compactReq);
  if (contLen < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
  contLen += sizeof(SMsgHead);

  void *pReq = taosMemoryMalloc(contLen);
  if (pReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  SMsgHead *pHead = pReq;
  pHead->contLen = htonl(contLen);
  pHead->vgId = htonl(pVgroup->vgId);

  tSerializeSCompactVnodeReq((char *)pReq + sizeof(SMsgHead), contLen, &compactReq);
  *pContLen = contLen;
  return pReq;
}

H
Hongze Cheng 已提交
2378 2379
static int32_t mndAddCompactVnodeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int64_t compactTs,
                                        STimeWindow tw) {
2380 2381 2382 2383
  STransAction action = {0};
  action.epSet = mndGetVgroupEpset(pMnode, pVgroup);

  int32_t contLen = 0;
H
Hongze Cheng 已提交
2384
  void   *pReq = mndBuildCompactVnodeReq(pMnode, pDb, pVgroup, &contLen, compactTs, tw);
2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398
  if (pReq == NULL) return -1;

  action.pCont = pReq;
  action.contLen = contLen;
  action.msgType = TDMT_VND_COMPACT;

  if (mndTransAppendRedoAction(pTrans, &action) != 0) {
    taosMemoryFree(pReq);
    return -1;
  }

  return 0;
}

H
Hongze Cheng 已提交
2399 2400 2401
int32_t mndBuildCompactVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int64_t compactTs,
                                    STimeWindow tw) {
  if (mndAddCompactVnodeAction(pMnode, pTrans, pDb, pVgroup, compactTs, tw) != 0) return -1;
2402
  return 0;
2403
}