dndMnode.c 27.0 KB
Newer Older
S
Shengliang Guan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http:www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE
#include "dndMnode.h"
#include "dndDnode.h"
#include "dndTransport.h"

static int32_t dndInitMnodeReadWorker(SDnode *pDnode);
static int32_t dndInitMnodeWriteWorker(SDnode *pDnode);
static int32_t dndInitMnodeSyncWorker(SDnode *pDnode);
static int32_t dndInitMnodeMgmtWorker(SDnode *pDnode);
static void    dndCleanupMnodeReadWorker(SDnode *pDnode);
static void    dndCleanupMnodeWriteWorker(SDnode *pDnode);
static void    dndCleanupMnodeSyncWorker(SDnode *pDnode);
static void    dndCleanupMnodeMgmtWorker(SDnode *pDnode);
static int32_t dndAllocMnodeReadQueue(SDnode *pDnode);
static int32_t dndAllocMnodeWriteQueue(SDnode *pDnode);
static int32_t dndAllocMnodeApplyQueue(SDnode *pDnode);
static int32_t dndAllocMnodeSyncQueue(SDnode *pDnode);
static int32_t dndAllocMnodeMgmtQueue(SDnode *pDnode);
static void    dndFreeMnodeReadQueue(SDnode *pDnode);
static void    dndFreeMnodeWriteQueue(SDnode *pDnode);
static void    dndFreeMnodeApplyQueue(SDnode *pDnode);
static void    dndFreeMnodeSyncQueue(SDnode *pDnode);
static void    dndFreeMnodeMgmtQueue(SDnode *pDnode);

static void    dndProcessMnodeReadQueue(SDnode *pDnode, SMnodeMsg *pMsg);
static void    dndProcessMnodeWriteQueue(SDnode *pDnode, SMnodeMsg *pMsg);
static void    dndProcessMnodeApplyQueue(SDnode *pDnode, SMnodeMsg *pMsg);
static void    dndProcessMnodeSyncQueue(SDnode *pDnode, SMnodeMsg *pMsg);
static void    dndProcessMnodeMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg);
static int32_t dndWriteMnodeMsgToQueue(SMnode *pMnode, taos_queue pQueue, SRpcMsg *pRpcMsg);
void           dndProcessMnodeReadMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void           dndProcessMnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void           dndProcessMnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void           dndProcessMnodeMgmtMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
static int32_t dndPutMsgIntoMnodeApplyQueue(SDnode *pDnode, SMnodeMsg *pMsg);

static int32_t dndStartMnodeWorker(SDnode *pDnode);
static void    dndStopMnodeWorker(SDnode *pDnode);

static SMnode *dndAcquireMnode(SDnode *pDnode);
static void    dndReleaseMnode(SDnode *pDnode, SMnode *pMnode);

static int32_t dndReadMnodeFile(SDnode *pDnode);
static int32_t dndWriteMnodeFile(SDnode *pDnode);

S
Shengliang Guan 已提交
61 62
static int32_t dndOpenMnode(SDnode *pDnode, SMnodeOpt *pOption);
static int32_t dndAlterMnode(SDnode *pDnode, SMnodeOpt *pOption);
S
Shengliang Guan 已提交
63 64 65 66 67 68 69
static int32_t dndDropMnode(SDnode *pDnode);

static int32_t dndProcessCreateMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg);
static int32_t dndProcessAlterMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg);
static int32_t dndProcessDropMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg);

static SMnode *dndAcquireMnode(SDnode *pDnode) {
S
Shengliang Guan 已提交
70
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
71 72 73 74 75 76 77 78 79 80 81 82
  SMnode     *pMnode = NULL;
  int32_t     refCount = 0;

  taosRLockLatch(&pMgmt->latch);
  if (pMgmt->deployed && !pMgmt->dropped) {
    refCount = atomic_add_fetch_32(&pMgmt->refCount, 1);
    pMnode = pMgmt->pMnode;
  } else {
    terrno = TSDB_CODE_DND_MNODE_NOT_DEPLOYED;
  }
  taosRUnLockLatch(&pMgmt->latch);

83 84 85
  if (pMnode != NULL) {
    dTrace("acquire mnode, refCount:%d", refCount);
  }
S
Shengliang Guan 已提交
86 87 88 89
  return pMnode;
}

static void dndReleaseMnode(SDnode *pDnode, SMnode *pMnode) {
S
Shengliang Guan 已提交
90
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
91 92 93 94 95 96 97 98
  int32_t     refCount = 0;

  taosRLockLatch(&pMgmt->latch);
  if (pMnode != NULL) {
    refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1);
  }
  taosRUnLockLatch(&pMgmt->latch);

99 100 101
  if (pMnode != NULL) {
    dTrace("release mnode, refCount:%d", refCount);
  }
S
Shengliang Guan 已提交
102 103 104
}

static int32_t dndReadMnodeFile(SDnode *pDnode) {
S
Shengliang Guan 已提交
105
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
106 107
  int32_t     code = TSDB_CODE_DND_MNODE_READ_FILE_ERROR;
  int32_t     len = 0;
108
  int32_t     maxLen = 4096;
S
Shengliang Guan 已提交
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
  char       *content = calloc(1, maxLen + 1);
  cJSON      *root = NULL;

  FILE *fp = fopen(pMgmt->file, "r");
  if (fp == NULL) {
    dDebug("file %s not exist", pMgmt->file);
    code = 0;
    goto PRASE_MNODE_OVER;
  }

  len = (int32_t)fread(content, 1, maxLen, fp);
  if (len <= 0) {
    dError("failed to read %s since content is null", pMgmt->file);
    goto PRASE_MNODE_OVER;
  }

  content[len] = 0;
  root = cJSON_Parse(content);
  if (root == NULL) {
    dError("failed to read %s since invalid json format", pMgmt->file);
    goto PRASE_MNODE_OVER;
  }

  cJSON *deployed = cJSON_GetObjectItem(root, "deployed");
  if (!deployed || deployed->type != cJSON_String) {
    dError("failed to read %s since deployed not found", pMgmt->file);
    goto PRASE_MNODE_OVER;
  }
  pMgmt->deployed = atoi(deployed->valuestring);

  cJSON *dropped = cJSON_GetObjectItem(root, "dropped");
  if (!dropped || dropped->type != cJSON_String) {
    dError("failed to read %s since dropped not found", pMgmt->file);
    goto PRASE_MNODE_OVER;
  }
  pMgmt->dropped = atoi(dropped->valuestring);

146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
  cJSON *nodes = cJSON_GetObjectItem(root, "nodes");
  if (!nodes || nodes->type != cJSON_Array) {
    dError("failed to read %s since nodes not found", pMgmt->file);
    goto PRASE_MNODE_OVER;
  }

  pMgmt->replica = cJSON_GetArraySize(nodes);
  if (pMgmt->replica <= 0 || pMgmt->replica > TSDB_MAX_REPLICA) {
    dError("failed to read %s since nodes size %d invalid", pMgmt->file, pMgmt->replica);
    goto PRASE_MNODE_OVER;
  }

  for (int32_t i = 0; i < pMgmt->replica; ++i) {
    cJSON *node = cJSON_GetArrayItem(nodes, i);
    if (node == NULL) break;

    SReplica *pReplica = &pMgmt->replicas[i];

    cJSON *id = cJSON_GetObjectItem(node, "id");
    if (!id || id->type != cJSON_String || id->valuestring == NULL) {
      dError("failed to read %s since id not found", pMgmt->file);
      goto PRASE_MNODE_OVER;
    }
    pReplica->id = atoi(id->valuestring);

    cJSON *fqdn = cJSON_GetObjectItem(node, "fqdn");
    if (!fqdn || fqdn->type != cJSON_String || fqdn->valuestring == NULL) {
      dError("failed to read %s since fqdn not found", pMgmt->file);
      goto PRASE_MNODE_OVER;
    }
    tstrncpy(pReplica->fqdn, fqdn->valuestring, TSDB_FQDN_LEN);

    cJSON *port = cJSON_GetObjectItem(node, "port");
    if (!port || port->type != cJSON_String || port->valuestring == NULL) {
      dError("failed to read %s since port not found", pMgmt->file);
      goto PRASE_MNODE_OVER;
    }
    pReplica->port = atoi(port->valuestring);
  }

S
Shengliang Guan 已提交
186 187 188 189 190 191 192 193 194 195 196 197 198
  code = 0;
  dInfo("succcessed to read file %s", pMgmt->file);

PRASE_MNODE_OVER:
  if (content != NULL) free(content);
  if (root != NULL) cJSON_Delete(root);
  if (fp != NULL) fclose(fp);

  terrno = code;
  return code;
}

static int32_t dndWriteMnodeFile(SDnode *pDnode) {
S
Shengliang Guan 已提交
199
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
200 201

  char file[PATH_MAX + 20] = {0};
S
Shengliang Guan 已提交
202 203 204
  snprintf(file, sizeof(file), "%s.bak", pMgmt->file);

  FILE *fp = fopen(file, "w");
S
Shengliang Guan 已提交
205
  if (fp == NULL) {
S
Shengliang Guan 已提交
206 207 208 209 210 211
    terrno = TSDB_CODE_DND_MNODE_WRITE_FILE_ERROR;
    dError("failed to write %s since %s", file, terrstr());
    return -1;
  }

  int32_t len = 0;
212
  int32_t maxLen = 4096;
S
Shengliang Guan 已提交
213 214 215 216
  char   *content = calloc(1, maxLen + 1);

  len += snprintf(content + len, maxLen - len, "{\n");
  len += snprintf(content + len, maxLen - len, "  \"deployed\": \"%d\",\n", pMgmt->deployed);
217 218 219 220 221 222 223 224 225 226 227 228 229 230

  len += snprintf(content + len, maxLen - len, "  \"dropped\": \"%d\",\n", pMgmt->dropped);
  len += snprintf(content + len, maxLen - len, "  \"nodes\": [{\n");
  for (int32_t i = 0; i < pMgmt->replica; ++i) {
    SReplica *pReplica = &pMgmt->replicas[i];
    len += snprintf(content + len, maxLen - len, "    \"id\": \"%d\",\n", pReplica->id);
    len += snprintf(content + len, maxLen - len, "    \"fqdn\": \"%s\",\n", pReplica->fqdn);
    len += snprintf(content + len, maxLen - len, "    \"port\": \"%u\"\n", pReplica->port);
    if (i < pMgmt->replica - 1) {
      len += snprintf(content + len, maxLen - len, "  },{\n");
    } else {
      len += snprintf(content + len, maxLen - len, "  }]\n");
    }
  }
S
Shengliang Guan 已提交
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
  len += snprintf(content + len, maxLen - len, "}\n");

  fwrite(content, 1, len, fp);
  taosFsyncFile(fileno(fp));
  fclose(fp);
  free(content);

  if (taosRenameFile(file, pMgmt->file) != 0) {
    terrno = TSDB_CODE_DND_MNODE_WRITE_FILE_ERROR;
    dError("failed to rename %s since %s", pMgmt->file, terrstr());
    return -1;
  }

  dInfo("successed to write %s", pMgmt->file);
  return 0;
}

static int32_t dndStartMnodeWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
  if (dndInitMnodeReadWorker(pDnode) != 0) {
    dError("failed to start mnode read worker since %s", terrstr());
    return -1;
  }

  if (dndInitMnodeWriteWorker(pDnode) != 0) {
    dError("failed to start mnode write worker since %s", terrstr());
    return -1;
  }

  if (dndInitMnodeSyncWorker(pDnode) != 0) {
    dError("failed to start mnode sync worker since %s", terrstr());
    return -1;
  }

S
Shengliang Guan 已提交
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
  if (dndAllocMnodeReadQueue(pDnode) != 0) {
    dError("failed to alloc mnode read queue since %s", terrstr());
    return -1;
  }

  if (dndAllocMnodeWriteQueue(pDnode) != 0) {
    dError("failed to alloc mnode write queue since %s", terrstr());
    return -1;
  }

  if (dndAllocMnodeApplyQueue(pDnode) != 0) {
    dError("failed to alloc mnode apply queue since %s", terrstr());
    return -1;
  }

  if (dndAllocMnodeSyncQueue(pDnode) != 0) {
    dError("failed to alloc mnode sync queue since %s", terrstr());
    return -1;
  }

  return 0;
}

static void dndStopMnodeWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
288
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
289 290 291 292 293 294 295 296 297 298 299 300

  taosWLockLatch(&pMgmt->latch);
  pMgmt->deployed = 0;
  pMgmt->pMnode = NULL;
  taosWUnLockLatch(&pMgmt->latch);

  while (pMgmt->refCount > 1) taosMsleep(10);
  while (!taosQueueEmpty(pMgmt->pReadQ)) taosMsleep(10);
  while (!taosQueueEmpty(pMgmt->pApplyQ)) taosMsleep(10);
  while (!taosQueueEmpty(pMgmt->pWriteQ)) taosMsleep(10);
  while (!taosQueueEmpty(pMgmt->pSyncQ)) taosMsleep(10);

301 302 303 304
  dndCleanupMnodeReadWorker(pDnode);
  dndCleanupMnodeWriteWorker(pDnode);
  dndCleanupMnodeSyncWorker(pDnode);

S
Shengliang Guan 已提交
305 306 307 308 309 310 311 312 313 314 315 316 317 318
  dndFreeMnodeReadQueue(pDnode);
  dndFreeMnodeWriteQueue(pDnode);
  dndFreeMnodeApplyQueue(pDnode);
  dndFreeMnodeSyncQueue(pDnode);
}

static bool dndNeedDeployMnode(SDnode *pDnode) {
  if (dndGetDnodeId(pDnode) > 0) {
    return false;
  }

  if (dndGetClusterId(pDnode) > 0) {
    return false;
  }
S
Shengliang Guan 已提交
319

S
Shengliang Guan 已提交
320 321 322 323 324 325 326
  if (strcmp(pDnode->opt.localEp, pDnode->opt.firstEp) != 0) {
    return false;
  }

  return true;
}

S
Shengliang Guan 已提交
327 328 329 330 331 332 333 334
static void dndInitMnodeOption(SDnode *pDnode, SMnodeOpt *pOption) {
  pOption->pDnode = pDnode;
  pOption->sendMsgToDnodeFp = dndSendMsgToDnode;
  pOption->sendMsgToMnodeFp = dndSendMsgToMnode;
  pOption->sendRedirectMsgFp = dndSendRedirectMsg;
  pOption->putMsgToApplyMsgFp = dndPutMsgIntoMnodeApplyQueue;
  pOption->dnodeId = dndGetDnodeId(pDnode);
  pOption->clusterId = dndGetClusterId(pDnode);
S
Shengliang Guan 已提交
335 336 337 338 339 340
  pOption->sver = pDnode->opt.sver;
  pOption->statusInterval = pDnode->opt.statusInterval;
  pOption->mnodeEqualVnodeNum = pDnode->opt.mnodeEqualVnodeNum;
  pOption->timezone = pDnode->opt.timezone;
  pOption->charset = pDnode->opt.charset;
  pOption->locale = pDnode->opt.locale;
S
Shengliang Guan 已提交
341 342 343 344 345 346 347 348 349 350
}

static void dndBuildMnodeDeployOption(SDnode *pDnode, SMnodeOpt *pOption) {
  dndInitMnodeOption(pDnode, pOption);
  pOption->replica = 1;
  pOption->selfIndex = 0;
  SReplica *pReplica = &pOption->replicas[0];
  pReplica->id = 1;
  pReplica->port = pDnode->opt.serverPort;
  tstrncpy(pReplica->fqdn, pDnode->opt.localFqdn, TSDB_FQDN_LEN);
351 352 353 354 355

  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
  pMgmt->selfIndex = pOption->selfIndex;
  pMgmt->replica = pOption->replica;
  memcpy(&pMgmt->replicas, pOption->replicas, sizeof(SReplica) * TSDB_MAX_REPLICA);
S
Shengliang Guan 已提交
356 357 358 359
}

static void dndBuildMnodeOpenOption(SDnode *pDnode, SMnodeOpt *pOption) {
  dndInitMnodeOption(pDnode, pOption);
360 361 362 363
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
  pOption->selfIndex = pMgmt->selfIndex;
  pOption->replica = pMgmt->replica;
  memcpy(&pOption->replicas, pMgmt->replicas, sizeof(SReplica) * TSDB_MAX_REPLICA);
S
Shengliang Guan 已提交
364 365 366 367 368 369 370 371 372
}

static int32_t dndBuildMnodeOptionFromMsg(SDnode *pDnode, SMnodeOpt *pOption, SCreateMnodeMsg *pMsg) {
  dndInitMnodeOption(pDnode, pOption);
  pOption->dnodeId = dndGetDnodeId(pDnode);
  pOption->clusterId = dndGetClusterId(pDnode);

  pOption->replica = pMsg->replica;
  pOption->selfIndex = -1;
373 374 375 376 377
  for (int32_t i = 0; i < pMsg->replica; ++i) {
    SReplica *pReplica = &pOption->replicas[i];
    pReplica->id = pMsg->replicas[i].id;
    pReplica->port = pMsg->replicas[i].port;
    tstrncpy(pReplica->fqdn, pMsg->replicas[i].fqdn, TSDB_FQDN_LEN);
S
Shengliang Guan 已提交
378
    if (pReplica->id == pOption->dnodeId) {
379
      pOption->selfIndex = i;
S
Shengliang Guan 已提交
380 381 382
    }
  }

S
Shengliang Guan 已提交
383
  if (pOption->selfIndex == -1) {
S
Shengliang Guan 已提交
384 385 386 387 388
    terrno = TSDB_CODE_DND_MNODE_ID_NOT_FOUND;
    dError("failed to build mnode options since %s", terrstr());
    return -1;
  }

389 390 391 392
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
  pMgmt->selfIndex = pOption->selfIndex;
  pMgmt->replica = pOption->replica;
  memcpy(&pMgmt->replicas, pOption->replicas, sizeof(SReplica) * TSDB_MAX_REPLICA);
S
Shengliang Guan 已提交
393 394 395
  return 0;
}

S
Shengliang Guan 已提交
396
static int32_t dndOpenMnode(SDnode *pDnode, SMnodeOpt *pOption) {
S
Shengliang Guan 已提交
397
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
398 399 400 401 402 403 404

  int32_t code = dndStartMnodeWorker(pDnode);
  if (code != 0) {
    dError("failed to start mnode worker since %s", terrstr());
    return code;
  }

405
  SMnode *pMnode = mndOpen(pDnode->dir.mnode, pOption);
S
Shengliang Guan 已提交
406 407 408 409 410 411 412 413 414 415 416 417
  if (pMnode == NULL) {
    dError("failed to open mnode since %s", terrstr());
    code = terrno;
    dndStopMnodeWorker(pDnode);
    terrno = code;
    return code;
  }

  if (dndWriteMnodeFile(pDnode) != 0) {
    dError("failed to write mnode file since %s", terrstr());
    code = terrno;
    dndStopMnodeWorker(pDnode);
418 419
    mndClose(pMnode);
    mndDestroy(pDnode->dir.mnode);
S
Shengliang Guan 已提交
420 421 422 423 424 425 426 427 428 429 430 431
    terrno = code;
    return code;
  }

  taosWLockLatch(&pMgmt->latch);
  pMgmt->pMnode = pMnode;
  pMgmt->deployed = 1;
  taosWUnLockLatch(&pMgmt->latch);

  return 0;
}

S
Shengliang Guan 已提交
432
static int32_t dndAlterMnode(SDnode *pDnode, SMnodeOpt *pOption) {
S
Shengliang Guan 已提交
433
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
434 435 436 437 438 439 440

  SMnode *pMnode = dndAcquireMnode(pDnode);
  if (pMnode == NULL) {
    dError("failed to alter mnode since %s", terrstr());
    return -1;
  }

441
  if (mndAlter(pMnode, pOption) != 0) {
S
Shengliang Guan 已提交
442 443 444 445 446 447 448 449 450 451
    dError("failed to alter mnode since %s", terrstr());
    dndReleaseMnode(pDnode, pMnode);
    return -1;
  }

  dndReleaseMnode(pDnode, pMnode);
  return 0;
}

static int32_t dndDropMnode(SDnode *pDnode) {
S
Shengliang Guan 已提交
452
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475

  SMnode *pMnode = dndAcquireMnode(pDnode);
  if (pMnode == NULL) {
    dError("failed to drop mnode since %s", terrstr());
    return -1;
  }

  taosRLockLatch(&pMgmt->latch);
  pMgmt->dropped = 1;
  taosRUnLockLatch(&pMgmt->latch);

  if (dndWriteMnodeFile(pDnode) != 0) {
    taosRLockLatch(&pMgmt->latch);
    pMgmt->dropped = 0;
    taosRUnLockLatch(&pMgmt->latch);

    dndReleaseMnode(pDnode, pMnode);
    dError("failed to drop mnode since %s", terrstr());
    return -1;
  }

  dndStopMnodeWorker(pDnode);
  dndWriteMnodeFile(pDnode);
476 477
  mndClose(pMnode);
  mndDestroy(pDnode->dir.mnode);
S
Shengliang Guan 已提交
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499

  return 0;
}

static SCreateMnodeMsg *dndParseCreateMnodeMsg(SRpcMsg *pRpcMsg) {
  SCreateMnodeMsg *pMsg = pRpcMsg->pCont;
  pMsg->dnodeId = htonl(pMsg->dnodeId);
  for (int32_t i = 0; i < pMsg->replica; ++i) {
    pMsg->replicas[i].id = htonl(pMsg->replicas[i].id);
    pMsg->replicas[i].port = htons(pMsg->replicas[i].port);
  }

  return pMsg;
}

static int32_t dndProcessCreateMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) {
  SCreateMnodeMsg *pMsg = dndParseCreateMnodeMsg(pRpcMsg->pCont);

  if (pMsg->dnodeId != dndGetDnodeId(pDnode)) {
    terrno = TSDB_CODE_DND_MNODE_ID_INVALID;
    return -1;
  } else {
S
Shengliang Guan 已提交
500 501
    SMnodeOpt option = {0};
    if (dndBuildMnodeOptionFromMsg(pDnode, &option, pMsg) != 0) {
S
Shengliang Guan 已提交
502 503
      return -1;
    }
504

S
Shengliang Guan 已提交
505 506 507 508 509 510 511 512 513 514 515
    return dndOpenMnode(pDnode, &option);
  }
}

static int32_t dndProcessAlterMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) {
  SAlterMnodeMsg *pMsg = dndParseCreateMnodeMsg(pRpcMsg->pCont);

  if (pMsg->dnodeId != dndGetDnodeId(pDnode)) {
    terrno = TSDB_CODE_DND_MNODE_ID_INVALID;
    return -1;
  } else {
S
Shengliang Guan 已提交
516 517
    SMnodeOpt option = {0};
    if (dndBuildMnodeOptionFromMsg(pDnode, &option, pMsg) != 0) {
S
Shengliang Guan 已提交
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
      return -1;
    }
    return dndAlterMnode(pDnode, &option);
  }
}

static int32_t dndProcessDropMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) {
  SDropMnodeMsg *pMsg = dndParseCreateMnodeMsg(pRpcMsg->pCont);

  if (pMsg->dnodeId != dndGetDnodeId(pDnode)) {
    terrno = TSDB_CODE_DND_MNODE_ID_INVALID;
    return -1;
  } else {
    return dndDropMnode(pDnode);
  }
}

static void dndProcessMnodeMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg) {
  int32_t code = 0;

  switch (pMsg->msgType) {
    case TSDB_MSG_TYPE_CREATE_MNODE_IN:
      code = dndProcessCreateMnodeReq(pDnode, pMsg);
      break;
    case TSDB_MSG_TYPE_ALTER_MNODE_IN:
      code = dndProcessAlterMnodeReq(pDnode, pMsg);
      break;
    case TSDB_MSG_TYPE_DROP_MNODE_IN:
      code = dndProcessDropMnodeReq(pDnode, pMsg);
      break;
    default:
      code = TSDB_CODE_MSG_NOT_PROCESSED;
      break;
  }

  SRpcMsg rsp = {.code = code, .handle = pMsg->handle};
  rpcSendResponse(&rsp);
  rpcFreeCont(pMsg->pCont);
  taosFreeQitem(pMsg);
}

static void dndProcessMnodeReadQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
S
Shengliang Guan 已提交
560
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
561 562 563

  SMnode *pMnode = dndAcquireMnode(pDnode);
  if (pMnode != NULL) {
564
    mndProcessReadMsg(pMsg);
S
Shengliang Guan 已提交
565 566
    dndReleaseMnode(pDnode, pMnode);
  } else {
567
    mndSendRsp(pMsg, terrno);
S
Shengliang Guan 已提交
568 569
  }

570
  mndCleanupMsg(pMsg);
S
Shengliang Guan 已提交
571 572 573
}

static void dndProcessMnodeWriteQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
S
Shengliang Guan 已提交
574
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
575 576 577

  SMnode *pMnode = dndAcquireMnode(pDnode);
  if (pMnode != NULL) {
578
    mndProcessWriteMsg(pMsg);
S
Shengliang Guan 已提交
579 580
    dndReleaseMnode(pDnode, pMnode);
  } else {
581
    mndSendRsp(pMsg, terrno);
S
Shengliang Guan 已提交
582 583
  }

584
  mndCleanupMsg(pMsg);
S
Shengliang Guan 已提交
585 586 587
}

static void dndProcessMnodeApplyQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
S
Shengliang Guan 已提交
588
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
589 590 591

  SMnode *pMnode = dndAcquireMnode(pDnode);
  if (pMnode != NULL) {
592
    mndProcessApplyMsg(pMsg);
S
Shengliang Guan 已提交
593 594
    dndReleaseMnode(pDnode, pMnode);
  } else {
595
    mndSendRsp(pMsg, terrno);
S
Shengliang Guan 已提交
596 597
  }

598
  mndCleanupMsg(pMsg);
S
Shengliang Guan 已提交
599 600 601
}

static void dndProcessMnodeSyncQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
S
Shengliang Guan 已提交
602
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
603 604 605

  SMnode *pMnode = dndAcquireMnode(pDnode);
  if (pMnode != NULL) {
606
    mndProcessSyncMsg(pMsg);
S
Shengliang Guan 已提交
607 608
    dndReleaseMnode(pDnode, pMnode);
  } else {
609
    mndSendRsp(pMsg, terrno);
S
Shengliang Guan 已提交
610 611
  }

612
  mndCleanupMsg(pMsg);
S
Shengliang Guan 已提交
613 614 615 616 617
}

static int32_t dndWriteMnodeMsgToQueue(SMnode *pMnode, taos_queue pQueue, SRpcMsg *pRpcMsg) {
  assert(pQueue);

618
  SMnodeMsg *pMsg = mndInitMsg(pMnode, pRpcMsg);
S
Shengliang Guan 已提交
619 620 621 622 623 624
  if (pMsg == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  if (taosWriteQitem(pQueue, pMsg) != 0) {
625
    mndCleanupMsg(pMsg);
S
Shengliang Guan 已提交
626 627 628 629 630 631 632 633
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  return 0;
}

void dndProcessMnodeMgmtMsg(SDnode *pDnode, SRpcMsg *pRpcMsg, SEpSet *pEpSet) {
S
Shengliang Guan 已提交
634
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
635 636 637 638 639 640 641 642 643 644 645 646
  SMnode     *pMnode = dndAcquireMnode(pDnode);

  SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg));
  if (pMsg == NULL || taosWriteQitem(pMgmt->pMgmtQ, pMsg) != 0) {
    SRpcMsg rsp = {.handle = pRpcMsg->handle, .code = TSDB_CODE_OUT_OF_MEMORY};
    rpcSendResponse(&rsp);
    rpcFreeCont(pRpcMsg->pCont);
    taosFreeQitem(pMsg);
  }
}

void dndProcessMnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
S
Shengliang Guan 已提交
647
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
648 649 650 651 652 653 654 655 656 657 658
  SMnode     *pMnode = dndAcquireMnode(pDnode);
  if (pMnode == NULL || dndWriteMnodeMsgToQueue(pMnode, pMgmt->pWriteQ, pMsg) != 0) {
    SRpcMsg rsp = {.handle = pMsg->handle, .code = terrno};
    rpcSendResponse(&rsp);
    rpcFreeCont(pMsg->pCont);
  }

  dndReleaseMnode(pDnode, pMnode);
}

void dndProcessMnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
S
Shengliang Guan 已提交
659
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
660 661 662 663 664 665 666 667 668 669 670
  SMnode     *pMnode = dndAcquireMnode(pDnode);
  if (pMnode == NULL || dndWriteMnodeMsgToQueue(pMnode, pMgmt->pSyncQ, pMsg) != 0) {
    SRpcMsg rsp = {.handle = pMsg->handle, .code = terrno};
    rpcSendResponse(&rsp);
    rpcFreeCont(pMsg->pCont);
  }

  dndReleaseMnode(pDnode, pMnode);
}

void dndProcessMnodeReadMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
S
Shengliang Guan 已提交
671
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
672 673 674 675 676 677 678 679 680 681 682
  SMnode     *pMnode = dndAcquireMnode(pDnode);
  if (pMnode == NULL || dndWriteMnodeMsgToQueue(pMnode, pMgmt->pSyncQ, pMsg) != 0) {
    SRpcMsg rsp = {.handle = pMsg->handle, .code = terrno};
    rpcSendResponse(&rsp);
    rpcFreeCont(pMsg->pCont);
  }

  dndReleaseMnode(pDnode, pMnode);
}

static int32_t dndPutMsgIntoMnodeApplyQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
S
Shengliang Guan 已提交
683
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
684 685 686 687 688 689 690 691 692 693 694 695

  SMnode *pMnode = dndAcquireMnode(pDnode);
  if (pMnode == NULL) {
    return -1;
  }

  int32_t code = taosWriteQitem(pMgmt->pApplyQ, pMsg);
  dndReleaseMnode(pDnode, pMnode);
  return code;
}

static int32_t dndAllocMnodeMgmtQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
696
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
697
  pMgmt->pMgmtQ = tWorkerAllocQueue(&pMgmt->mgmtPool, pDnode, (FProcessItem)dndProcessMnodeMgmtQueue);
S
Shengliang Guan 已提交
698 699 700 701 702 703 704 705
  if (pMgmt->pMgmtQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  return 0;
}

static void dndFreeMnodeMgmtQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
706
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
707 708 709 710 711
  tWorkerFreeQueue(&pMgmt->mgmtPool, pMgmt->pMgmtQ);
  pMgmt->pMgmtQ = NULL;
}

static int32_t dndInitMnodeMgmtWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
712
  SMnodeMgmt  *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
713 714 715 716 717
  SWorkerPool *pPool = &pMgmt->mgmtPool;
  pPool->name = "mnode-mgmt";
  pPool->min = 1;
  pPool->max = 1;
  if (tWorkerInit(pPool) != 0) {
S
Shengliang Guan 已提交
718
    terrno = TSDB_CODE_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
719 720 721
    return -1;
  }

722
  dDebug("mnode mgmt worker is initialized");
S
Shengliang Guan 已提交
723 724 725 726
  return 0;
}

static void dndCleanupMnodeMgmtWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
727
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
728
  tWorkerCleanup(&pMgmt->mgmtPool);
729
  dDebug("mnode mgmt worker is closed");
S
Shengliang Guan 已提交
730 731 732
}

static int32_t dndAllocMnodeReadQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
733
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
734
  pMgmt->pReadQ = tWorkerAllocQueue(&pMgmt->readPool, pDnode, (FProcessItem)dndProcessMnodeReadQueue);
S
Shengliang Guan 已提交
735 736 737 738
  if (pMgmt->pReadQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
S
Shengliang Guan 已提交
739

S
Shengliang Guan 已提交
740 741 742 743
  return 0;
}

static void dndFreeMnodeReadQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
744
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
745 746 747 748 749
  tWorkerFreeQueue(&pMgmt->readPool, pMgmt->pReadQ);
  pMgmt->pReadQ = NULL;
}

static int32_t dndInitMnodeReadWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
750
  SMnodeMgmt  *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
751 752 753 754 755
  SWorkerPool *pPool = &pMgmt->readPool;
  pPool->name = "mnode-read";
  pPool->min = 0;
  pPool->max = 1;
  if (tWorkerInit(pPool) != 0) {
S
Shengliang Guan 已提交
756
    terrno = TSDB_CODE_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
757 758 759
    return -1;
  }

760
  dDebug("mnode read worker is initialized");
S
Shengliang Guan 已提交
761 762 763 764
  return 0;
}

static void dndCleanupMnodeReadWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
765
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
766
  tWorkerCleanup(&pMgmt->readPool);
767
  dDebug("mnode read worker is closed");
S
Shengliang Guan 已提交
768 769 770
}

static int32_t dndAllocMnodeWriteQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
771
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
772
  pMgmt->pWriteQ = tWorkerAllocQueue(&pMgmt->writePool, pDnode, (FProcessItem)dndProcessMnodeWriteQueue);
S
Shengliang Guan 已提交
773 774 775 776
  if (pMgmt->pWriteQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
S
Shengliang Guan 已提交
777

S
Shengliang Guan 已提交
778 779 780 781
  return 0;
}

static void dndFreeMnodeWriteQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
782
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
783 784 785 786 787
  tWorkerFreeQueue(&pMgmt->writePool, pMgmt->pWriteQ);
  pMgmt->pWriteQ = NULL;
}

static int32_t dndAllocMnodeApplyQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
788
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
789
  pMgmt->pApplyQ = tWorkerAllocQueue(&pMgmt->writePool, pDnode, (FProcessItem)dndProcessMnodeApplyQueue);
S
Shengliang Guan 已提交
790 791 792 793
  if (pMgmt->pApplyQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
S
Shengliang Guan 已提交
794

S
Shengliang Guan 已提交
795 796 797 798
  return 0;
}

static void dndFreeMnodeApplyQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
799
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
800 801 802 803 804
  tWorkerFreeQueue(&pMgmt->writePool, pMgmt->pApplyQ);
  pMgmt->pApplyQ = NULL;
}

static int32_t dndInitMnodeWriteWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
805
  SMnodeMgmt  *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
806 807 808 809 810
  SWorkerPool *pPool = &pMgmt->writePool;
  pPool->name = "mnode-write";
  pPool->min = 0;
  pPool->max = 1;
  if (tWorkerInit(pPool) != 0) {
S
Shengliang Guan 已提交
811
    terrno = TSDB_CODE_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
812 813 814
    return -1;
  }

815
  dDebug("mnode write worker is initialized");
S
Shengliang Guan 已提交
816 817 818 819
  return 0;
}

static void dndCleanupMnodeWriteWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
820
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
821
  tWorkerCleanup(&pMgmt->writePool);
822
  dDebug("mnode write worker is closed");
S
Shengliang Guan 已提交
823 824 825
}

static int32_t dndAllocMnodeSyncQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
826
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
827
  pMgmt->pSyncQ = tWorkerAllocQueue(&pMgmt->syncPool, pDnode, (FProcessItem)dndProcessMnodeSyncQueue);
S
Shengliang Guan 已提交
828 829 830 831
  if (pMgmt->pSyncQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
S
Shengliang Guan 已提交
832

S
Shengliang Guan 已提交
833 834 835 836
  return 0;
}

static void dndFreeMnodeSyncQueue(SDnode *pDnode) {
S
Shengliang Guan 已提交
837
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
838 839 840 841 842
  tWorkerFreeQueue(&pMgmt->syncPool, pMgmt->pSyncQ);
  pMgmt->pSyncQ = NULL;
}

static int32_t dndInitMnodeSyncWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
843
  SMnodeMgmt  *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
844 845 846 847
  SWorkerPool *pPool = &pMgmt->syncPool;
  pPool->name = "mnode-sync";
  pPool->min = 0;
  pPool->max = 1;
S
Shengliang Guan 已提交
848 849 850 851 852
  if (tWorkerInit(pPool) != 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

853
  dDebug("mnode sync worker is initialized");
S
Shengliang Guan 已提交
854
  return 0;
S
Shengliang Guan 已提交
855 856 857
}

static void dndCleanupMnodeSyncWorker(SDnode *pDnode) {
S
Shengliang Guan 已提交
858
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
859
  tWorkerCleanup(&pMgmt->syncPool);
860
  dDebug("mnode sync worker is closed");
S
Shengliang Guan 已提交
861 862 863 864
}

int32_t dndInitMnode(SDnode *pDnode) {
  dInfo("dnode-mnode start to init");
S
Shengliang Guan 已提交
865
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
  taosInitRWLatch(&pMgmt->latch);

  if (dndInitMnodeMgmtWorker(pDnode) != 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  char path[PATH_MAX];
  snprintf(path, PATH_MAX, "%s/mnode.json", pDnode->dir.dnode);
  pMgmt->file = strdup(path);
  if (pMgmt->file == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  if (dndReadMnodeFile(pDnode) != 0) {
    return -1;
  }

  if (pMgmt->dropped) {
    dInfo("mnode has been deployed and needs to be deleted");
887
    mndDestroy(pDnode->dir.mnode);
S
Shengliang Guan 已提交
888 889 890 891 892 893 894 895 896 897 898
    return 0;
  }

  if (!pMgmt->deployed) {
    bool needDeploy = dndNeedDeployMnode(pDnode);
    if (!needDeploy) {
      dDebug("mnode does not need to be deployed");
      return 0;
    }

    dInfo("start to deploy mnode");
S
Shengliang Guan 已提交
899 900 901
    SMnodeOpt option = {0};
    dndBuildMnodeDeployOption(pDnode, &option);
    return dndOpenMnode(pDnode, &option);
S
Shengliang Guan 已提交
902 903
  } else {
    dInfo("start to open mnode");
S
Shengliang Guan 已提交
904 905 906
    SMnodeOpt option = {0};
    dndBuildMnodeOpenOption(pDnode, &option);
    return dndOpenMnode(pDnode, &option);
S
Shengliang Guan 已提交
907 908 909 910
  }
}

void dndCleanupMnode(SDnode *pDnode) {
S
Shengliang Guan 已提交
911
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
912 913 914 915 916 917 918 919 920

  dInfo("dnode-mnode start to clean up");
  dndStopMnodeWorker(pDnode);
  dndCleanupMnodeMgmtWorker(pDnode);
  tfree(pMgmt->file);
  dInfo("dnode-mnode is cleaned up");
}

int32_t dndGetUserAuthFromMnode(SDnode *pDnode, char *user, char *spi, char *encrypt, char *secret, char *ckey) {
S
Shengliang Guan 已提交
921
  SMnodeMgmt *pMgmt = &pDnode->mmgmt;
S
Shengliang Guan 已提交
922 923 924 925 926 927 928 929

  SMnode *pMnode = dndAcquireMnode(pDnode);
  if (pMnode == NULL) {
    terrno = TSDB_CODE_APP_NOT_READY;
    dTrace("failed to get user auth since %s", terrstr());
    return -1;
  }

930
  int32_t code = mndRetriveAuth(pMnode, user, spi, encrypt, secret, ckey);
S
Shengliang Guan 已提交
931 932 933
  dndReleaseMnode(pDnode, pMnode);
  return code;
}