dnodeMgmt.c 24.5 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

S
slguan 已提交
16
#define _DEFAULT_SOURCE
17
#include "os.h"
J
jtao1735 已提交
18
#include "cJSON.h"
S
slguan 已提交
19 20
#include "taoserror.h"
#include "taosmsg.h"
J
jtao1735 已提交
21 22
#include "ttime.h"
#include "ttimer.h"
S
slguan 已提交
23
#include "tsdb.h"
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
24
#include "twal.h"
25
#include "tqueue.h"
J
jtao1735 已提交
26 27 28 29
#include "tsync.h"
#include "ttime.h"
#include "ttimer.h"
#include "tbalance.h"
S
slguan 已提交
30
#include "tglobal.h"
J
jtao1735 已提交
31 32 33
#include "dnode.h"
#include "vnode.h"
#include "mnode.h"
34
#include "dnodeInt.h"
35
#include "dnodeMgmt.h"
36 37
#include "dnodeVRead.h"
#include "dnodeVWrite.h"
J
jtao1735 已提交
38 39 40 41
#include "dnodeModule.h"

#define MPEER_CONTENT_LEN 2000

42 43 44 45 46 47 48 49 50
typedef struct {
  pthread_t thread;
  int32_t   threadIndex;
  int32_t   failed;
  int32_t   opened;
  int32_t   vnodeNum;
  int32_t * vnodeList;
} SOpenVnodeThread;

51 52 53 54
void *          tsDnodeTmr = NULL;
static void *   tsStatusTimer = NULL;
static uint32_t tsRebootTime;

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
55
static SRpcIpSet     tsDMnodeIpSet = {0};
56 57
static SDMMnodeInfos tsDMnodeInfos = {0};
static SDMDnodeCfg   tsDnodeCfg = {0};
58 59 60
static taos_qset     tsMgmtQset = NULL;
static taos_queue    tsMgmtQueue = NULL;
static pthread_t     tsQthread;
61

J
jtao1735 已提交
62 63 64 65 66 67 68 69
static void   dnodeUpdateMnodeInfos(SDMMnodeInfos *pMnodes);
static bool   dnodeReadMnodeInfos();
static void   dnodeSaveMnodeInfos();
static void   dnodeUpdateDnodeCfg(SDMDnodeCfg *pCfg);
static bool   dnodeReadDnodeCfg();
static void   dnodeSaveDnodeCfg();
static void   dnodeProcessStatusRsp(SRpcMsg *pMsg);
static void   dnodeSendStatusMsg(void *handle, void *tmrId);
70
static void  *dnodeProcessMgmtQueue(void *param);
J
jtao1735 已提交
71

S
slguan 已提交
72
static int32_t  dnodeOpenVnodes();
73
static void     dnodeCloseVnodes();
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
74 75 76 77 78
static int32_t  dnodeProcessCreateVnodeMsg(SRpcMsg *pMsg);
static int32_t  dnodeProcessDropVnodeMsg(SRpcMsg *pMsg);
static int32_t  dnodeProcessAlterStreamMsg(SRpcMsg *pMsg);
static int32_t  dnodeProcessConfigDnodeMsg(SRpcMsg *pMsg);
static int32_t (*dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *pMsg);
S
slguan 已提交
79 80

int32_t dnodeInitMgmt() {
S
slguan 已提交
81 82
  dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = dnodeProcessCreateVnodeMsg;
  dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_DROP_VNODE]   = dnodeProcessDropVnodeMsg;
S
slguan 已提交
83 84
  dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = dnodeProcessAlterStreamMsg;
  dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CONFIG_DNODE] = dnodeProcessConfigDnodeMsg;
S
slguan 已提交
85

J
jtao1735 已提交
86 87 88 89 90
  dnodeAddClientRspHandle(TSDB_MSG_TYPE_DM_STATUS_RSP,  dnodeProcessStatusRsp);
  dnodeReadDnodeCfg();
  tsRebootTime = taosGetTimestampSec();

  if (!dnodeReadMnodeInfos()) {
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
91
    memset(&tsDMnodeIpSet, 0, sizeof(SRpcIpSet));
92 93
    memset(&tsDMnodeInfos, 0, sizeof(SDMMnodeInfos));

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
94 95
    tsDMnodeIpSet.numOfIps = 1;
    taosGetFqdnPortFromEp(tsFirst, tsDMnodeIpSet.fqdn[0], &tsDMnodeIpSet.port[0]);
96
    
J
jtao1735 已提交
97
    if (strcmp(tsSecond, tsFirst) != 0) {
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
98 99
      tsDMnodeIpSet.numOfIps = 2;
      taosGetFqdnPortFromEp(tsSecond, tsDMnodeIpSet.fqdn[1], &tsDMnodeIpSet.port[1]);
J
jtao1735 已提交
100 101
    }
  } else {
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
102 103
    tsDMnodeIpSet.inUse = tsDMnodeInfos.inUse;
    tsDMnodeIpSet.numOfIps = tsDMnodeInfos.nodeNum;
104
    for (int32_t i = 0; i < tsDMnodeInfos.nodeNum; i++) {
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
105
      taosGetFqdnPortFromEp(tsDMnodeInfos.nodeInfos[i].nodeEp, tsDMnodeIpSet.fqdn[i], &tsDMnodeIpSet.port[i]);
J
jtao1735 已提交
106 107 108
    }
  }

109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
  // create the queue and thread to handle the message 
  tsMgmtQset = taosOpenQset();
  if (tsMgmtQset == NULL) {
    dError("failed to create the mgmt queue set");
    dnodeCleanupMgmt();
    return -1;
  }

  tsMgmtQueue = taosOpenQueue();
  if (tsMgmtQueue == NULL) {
    dError("failed to create the mgmt queue");
    dnodeCleanupMgmt();
    return -1;
  }

  taosAddIntoQset(tsMgmtQset, tsMgmtQueue, NULL);

  pthread_attr_t thAttr;
  pthread_attr_init(&thAttr);
  pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE);

  int32_t code = pthread_create(&tsQthread, &thAttr, dnodeProcessMgmtQueue, NULL);
  pthread_attr_destroy(&thAttr);
  if (code != 0) {
    dError("failed to create thread to process mgmt queue, reason:%s", strerror(errno));
    dnodeCleanupMgmt();
    return -1; 
  }

S
Shengliang Guan 已提交
138
  code = vnodeInitResources();
S
Shengliang Guan 已提交
139 140 141 142 143
  if (code != TSDB_CODE_SUCCESS) {
    dnodeCleanupMgmt();
    return -1;
  }

144
  code = dnodeOpenVnodes();
S
[TD-17]  
slguan 已提交
145
  if (code != TSDB_CODE_SUCCESS) {
146 147 148 149 150 151 152 153
    dnodeCleanupMgmt();
    return -1;
  }

  tsDnodeTmr = taosTmrInit(100, 200, 60000, "DND-DM");
  if (tsDnodeTmr == NULL) {
    dError("failed to init dnode timer");
    dnodeCleanupMgmt();
S
[TD-17]  
slguan 已提交
154 155
    return -1;
  }
S
slguan 已提交
156

J
jtao1735 已提交
157 158
  taosTmrReset(dnodeSendStatusMsg, 500, NULL, tsDnodeTmr, &tsStatusTimer);
  
159
  dInfo("dnode mgmt is initialized");
J
jtao1735 已提交
160
 
S
[TD-17]  
slguan 已提交
161
  return TSDB_CODE_SUCCESS;
S
#1177  
slguan 已提交
162 163
}

S
slguan 已提交
164
void dnodeCleanupMgmt() {
J
jtao1735 已提交
165 166 167 168 169 170 171 172 173 174
  if (tsStatusTimer != NULL) {
    taosTmrStopA(&tsStatusTimer);
    tsStatusTimer = NULL;
  }

  if (tsDnodeTmr != NULL) {
    taosTmrCleanUp(tsDnodeTmr);
    tsDnodeTmr = NULL;
  }

175
  dnodeCloseVnodes();
176 177 178 179 180 181 182 183 184

  if (tsMgmtQset) taosQsetThreadResume(tsMgmtQset);
  if (tsQthread) pthread_join(tsQthread, NULL);

  if (tsMgmtQueue) taosCloseQueue(tsMgmtQueue);
  if (tsMgmtQset) taosCloseQset(tsMgmtQset);
  tsMgmtQset = NULL;
  tsMgmtQueue = NULL;

185
  vnodeCleanupResources();
S
#1177  
slguan 已提交
186 187
}

188 189
void dnodeDispatchToMgmtQueue(SRpcMsg *pMsg) {
  void *item;
S
slguan 已提交
190

191
  item = taosAllocateQitem(sizeof(SRpcMsg));
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
192 193 194 195
  if (item) {
    memcpy(item, pMsg, sizeof(SRpcMsg));
    taosWriteQitem(tsMgmtQueue, 1, item);
  } else {
196 197 198 199 200 201
    SRpcMsg rsp = {
      .handle = pMsg->handle,
      .pCont  = NULL,
      .code   = TSDB_CODE_DND_OUT_OF_MEMORY
    };
    
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
202 203 204
    rpcSendResponse(&rsp);
    rpcFreeCont(pMsg->pCont);
  }
205 206 207 208
}

static void *dnodeProcessMgmtQueue(void *param) {
  SRpcMsg *pMsg;
209
  SRpcMsg  rsp = {0};
210
  int      type;
211
  void *   handle;
212 213 214

  while (1) {
    if (taosReadQitemFromQset(tsMgmtQset, &type, (void **) &pMsg, &handle) == 0) {
215
      dDebug("dnode mgmt got no message from qset, exit ...");
216 217 218
      break;
    }

219
    dDebug("%p, msg:%s will be processed", pMsg->ahandle, taosMsg[pMsg->msgType]);    
220 221 222
    if (dnodeProcessMgmtMsgFp[pMsg->msgType]) {
      rsp.code = (*dnodeProcessMgmtMsgFp[pMsg->msgType])(pMsg);
    } else {
223
      rsp.code = TSDB_CODE_DND_MSG_NOT_PROCESSED;
224 225 226 227 228 229 230 231 232
    }

    rsp.handle = pMsg->handle;
    rsp.pCont  = NULL;
    rpcSendResponse(&rsp);

    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
  }
S
slguan 已提交
233

234
  return NULL;
S
slguan 已提交
235
}
S
slguan 已提交
236

237
static int32_t dnodeGetVnodeList(int32_t vnodeList[], int32_t *numOfVnodes) {
S
slguan 已提交
238 239
  DIR *dir = opendir(tsVnodeDir);
  if (dir == NULL) {
240
    return TSDB_CODE_DND_NO_WRITE_ACCESS;
S
slguan 已提交
241 242
  }

243
  *numOfVnodes = 0;
S
slguan 已提交
244 245 246 247 248 249 250 251
  struct dirent *de = NULL;
  while ((de = readdir(dir)) != NULL) {
    if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue;
    if (de->d_type & DT_DIR) {
      if (strncmp("vnode", de->d_name, 5) != 0) continue;
      int32_t vnode = atoi(de->d_name + 5);
      if (vnode == 0) continue;

252
      (*numOfVnodes)++;
253 254 255 256 257 258 259

      if (*numOfVnodes >= TSDB_MAX_VNODES) {
        dError("vgId:%d, too many vnode directory in disk, exist:%d max:%d", vnode, *numOfVnodes, TSDB_MAX_VNODES);
        continue;
      } else {
        vnodeList[*numOfVnodes - 1] = vnode;
      }
S
slguan 已提交
260 261 262 263
    }
  }
  closedir(dir);

264
  return TSDB_CODE_SUCCESS;
265 266
}

267 268
static void *dnodeOpenVnode(void *param) {
  SOpenVnodeThread *pThread = param;
S
Shengliang Guan 已提交
269 270
  char vnodeDir[TSDB_FILENAME_LEN * 3];

271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
  dDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum);

  for (int32_t v = 0; v < pThread->vnodeNum; ++v) {
    int32_t vgId = pThread->vnodeList[v];
    snprintf(vnodeDir, TSDB_FILENAME_LEN * 3, "%s/vnode%d", tsVnodeDir, vgId);
    if (vnodeOpen(vgId, vnodeDir) < 0) {
      dError("vgId:%d, failed to open vnode by thread:%d", vgId, pThread->threadIndex);
      pThread->failed++;
    } else {
      dDebug("vgId:%d, is openned by thread:%d", vgId, pThread->threadIndex);
      pThread->opened++;
    }
  }

  dDebug("thread:%d, total vnodes:%d, openned:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened,
         pThread->failed);
  return NULL;
}

static int32_t dnodeOpenVnodes() {
S
Shengliang Guan 已提交
291 292
  int32_t vnodeList[TSDB_MAX_VNODES] = {0};
  int32_t numOfVnodes = 0;
293
  int32_t status = dnodeGetVnodeList(vnodeList, &numOfVnodes);
294 295

  if (status != TSDB_CODE_SUCCESS) {
296
    dInfo("get dnode list failed");
297 298
    return status;
  }
299

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
  int32_t threadNum = tsNumOfCores;
  int32_t vnodesPerThread = numOfVnodes / threadNum + 1;
  SOpenVnodeThread *threads = calloc(threadNum, sizeof(SOpenVnodeThread));
  for (int32_t t = 0; t < threadNum; ++t) {
    threads[t].threadIndex = t;
    threads[t].vnodeList = calloc(vnodesPerThread, sizeof(int32_t));
  }

  for (int32_t v = 0; v < numOfVnodes; ++v) {
    int32_t t = v % threadNum;
    SOpenVnodeThread *pThread = &threads[t];
    pThread->vnodeList[pThread->vnodeNum++] = vnodeList[v];
  }

  dDebug("start %d threads to open %d vnodes", threadNum, numOfVnodes);

  for (int32_t t = 0; t < threadNum; ++t) {
    SOpenVnodeThread *pThread = &threads[t];
    if (pThread->vnodeNum == 0) continue;

    pthread_attr_t thAttr;
    pthread_attr_init(&thAttr);
    pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE);
    if (pthread_create(&pThread->thread, &thAttr, dnodeOpenVnode, pThread) != 0) {
      dError("thread:%d, failed to create thread to open vnode, reason:%s", pThread->threadIndex, strerror(errno));
    }

    pthread_attr_destroy(&thAttr);
  }

  int32_t openVnodes = 0;
  int32_t failedVnodes = 0;
  for (int32_t t = 0; t < threadNum; ++t) {
    SOpenVnodeThread *pThread = &threads[t];
    if (pThread->vnodeNum > 0 && pThread->thread) {
      pthread_join(pThread->thread, NULL);
    }
    openVnodes += pThread->opened;
    failedVnodes += pThread->failed;
    free(pThread->vnodeList);
S
slguan 已提交
340
  }
341

342
  free(threads);
343 344
  dInfo("there are total vnodes:%d, openned:%d failed:%d", numOfVnodes, openVnodes, failedVnodes);

S
slguan 已提交
345
  return TSDB_CODE_SUCCESS;
346 347
}

B
Bomin Zhang 已提交
348
void dnodeStartStream() {
S
Shengliang Guan 已提交
349
  int32_t vnodeList[TSDB_MAX_VNODES] = {0};
B
Bomin Zhang 已提交
350
  int32_t numOfVnodes = 0;
351
  int32_t status = vnodeGetVnodeList(vnodeList, &numOfVnodes);
B
Bomin Zhang 已提交
352 353

  if (status != TSDB_CODE_SUCCESS) {
354
    dInfo("get dnode list failed");
B
Bomin Zhang 已提交
355 356 357 358 359 360 361
    return;
  }

  for (int32_t i = 0; i < numOfVnodes; ++i) {
    vnodeStartStream(vnodeList[i]);
  }

362
  dInfo("streams started");
B
Bomin Zhang 已提交
363 364
}

365
static void dnodeCloseVnodes() {
S
Shengliang Guan 已提交
366
  int32_t vnodeList[TSDB_MAX_VNODES]= {0};
367 368 369
  int32_t numOfVnodes;
  int32_t status;

370
  status = vnodeGetVnodeList(vnodeList, &numOfVnodes);
371 372

  if (status != TSDB_CODE_SUCCESS) {
373
    dInfo("get dnode list failed");
374 375
    return;
  }
S
slguan 已提交
376 377 378 379 380

  for (int32_t i = 0; i < numOfVnodes; ++i) {
    vnodeClose(vnodeList[i]);
  }

381
  dInfo("total vnodes:%d are all closed", numOfVnodes);
H
hzcheng 已提交
382 383
}

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
384
static int32_t dnodeProcessCreateVnodeMsg(SRpcMsg *rpcMsg) {
S
slguan 已提交
385
  SMDCreateVnodeMsg *pCreate = rpcMsg->pCont;
S
slguan 已提交
386
  pCreate->cfg.vgId                = htonl(pCreate->cfg.vgId);
S
slguan 已提交
387
  pCreate->cfg.cfgVersion          = htonl(pCreate->cfg.cfgVersion);
S
slguan 已提交
388
  pCreate->cfg.maxTables           = htonl(pCreate->cfg.maxTables);
S
slguan 已提交
389 390
  pCreate->cfg.cacheBlockSize      = htonl(pCreate->cfg.cacheBlockSize);
  pCreate->cfg.totalBlocks         = htonl(pCreate->cfg.totalBlocks);
S
slguan 已提交
391 392 393 394
  pCreate->cfg.daysPerFile         = htonl(pCreate->cfg.daysPerFile);
  pCreate->cfg.daysToKeep1         = htonl(pCreate->cfg.daysToKeep1);
  pCreate->cfg.daysToKeep2         = htonl(pCreate->cfg.daysToKeep2);
  pCreate->cfg.daysToKeep          = htonl(pCreate->cfg.daysToKeep);
S
slguan 已提交
395 396
  pCreate->cfg.minRowsPerFileBlock = htonl(pCreate->cfg.minRowsPerFileBlock);
  pCreate->cfg.maxRowsPerFileBlock = htonl(pCreate->cfg.maxRowsPerFileBlock);
S
slguan 已提交
397 398
  pCreate->cfg.commitTime          = htonl(pCreate->cfg.commitTime);

399
  for (int32_t j = 0; j < pCreate->cfg.replications; ++j) {
S
slguan 已提交
400
    pCreate->nodes[j].nodeId = htonl(pCreate->nodes[j].nodeId);
401
  }
402

S
slguan 已提交
403 404 405 406 407 408 409 410
  void *pVnode = vnodeAccquireVnode(pCreate->cfg.vgId);
  if (pVnode != NULL) {
    int32_t code = vnodeAlter(pVnode, pCreate);
    vnodeRelease(pVnode);
    return code;
  } else {
    return vnodeCreate(pCreate);
  }
S
slguan 已提交
411 412
}

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
413
static int32_t dnodeProcessDropVnodeMsg(SRpcMsg *rpcMsg) {
S
slguan 已提交
414
  SMDDropVnodeMsg *pDrop = rpcMsg->pCont;
S
slguan 已提交
415
  pDrop->vgId = htonl(pDrop->vgId);
S
slguan 已提交
416

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
417
  return vnodeDrop(pDrop->vgId);
H
hzcheng 已提交
418 419
}

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
420
static int32_t dnodeProcessAlterStreamMsg(SRpcMsg *pMsg) {
421
//  SMDAlterStreamMsg *pStream = pCont;
S
slguan 已提交
422 423 424 425 426 427 428
//  pStream->uid    = htobe64(pStream->uid);
//  pStream->stime  = htobe64(pStream->stime);
//  pStream->vnode  = htonl(pStream->vnode);
//  pStream->sid    = htonl(pStream->sid);
//  pStream->status = htonl(pStream->status);
//
//  int32_t code = dnodeCreateStream(pStream);
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
429 430

  return 0;
S
slguan 已提交
431 432
}

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
433
static int32_t dnodeProcessConfigDnodeMsg(SRpcMsg *pMsg) {
S
slguan 已提交
434
  SMDCfgDnodeMsg *pCfg = (SMDCfgDnodeMsg *)pMsg->pCont;
S
slguan 已提交
435
  return taosCfgDynamicOptions(pCfg->config);
S
slguan 已提交
436
}
J
jtao1735 已提交
437

438
void dnodeUpdateMnodeIpSetForPeer(SRpcIpSet *pIpSet) {
439
  dInfo("mnode IP list for is changed, numOfIps:%d inUse:%d", pIpSet->numOfIps, pIpSet->inUse);
S
Shengliang Guan 已提交
440
  for (int i = 0; i < pIpSet->numOfIps; ++i) {
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
441
    pIpSet->port[i] -= TSDB_PORT_DNODEDNODE;
442
    dInfo("mnode index:%d %s:%u", i, pIpSet->fqdn[i], pIpSet->port[i])
S
Shengliang Guan 已提交
443 444
  }

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
445
  tsDMnodeIpSet = *pIpSet;
J
jtao1735 已提交
446 447
}

448
void dnodeGetMnodeIpSetForPeer(void *ipSetRaw) {
J
jtao1735 已提交
449
  SRpcIpSet *ipSet = ipSetRaw;
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
450 451 452 453
  *ipSet = tsDMnodeIpSet;

  for (int i=0; i<ipSet->numOfIps; ++i) 
    ipSet->port[i] += TSDB_PORT_DNODEDNODE;
454 455 456 457
}

void dnodeGetMnodeIpSetForShell(void *ipSetRaw) {
  SRpcIpSet *ipSet = ipSetRaw;
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
458
  *ipSet = tsDMnodeIpSet;
J
jtao1735 已提交
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
}

static void dnodeProcessStatusRsp(SRpcMsg *pMsg) {
  if (pMsg->code != TSDB_CODE_SUCCESS) {
    dError("status rsp is received, error:%s", tstrerror(pMsg->code));
    taosTmrReset(dnodeSendStatusMsg, tsStatusInterval * 1000, NULL, tsDnodeTmr, &tsStatusTimer);
    return;
  }

  SDMStatusRsp *pStatusRsp = pMsg->pCont;
  SDMMnodeInfos *pMnodes = &pStatusRsp->mnodes;
  if (pMnodes->nodeNum <= 0) {
    dError("status msg is invalid, num of ips is %d", pMnodes->nodeNum);
    taosTmrReset(dnodeSendStatusMsg, tsStatusInterval * 1000, NULL, tsDnodeTmr, &tsStatusTimer);
    return;
  }

  SDMDnodeCfg *pCfg = &pStatusRsp->dnodeCfg;
  pCfg->numOfVnodes  = htonl(pCfg->numOfVnodes);
  pCfg->moduleStatus = htonl(pCfg->moduleStatus);
  pCfg->dnodeId      = htonl(pCfg->dnodeId);

  for (int32_t i = 0; i < pMnodes->nodeNum; ++i) {
    SDMMnodeInfo *pMnodeInfo = &pMnodes->nodeInfos[i];
    pMnodeInfo->nodeId   = htonl(pMnodeInfo->nodeId);
  }

486
  vnodeSetAccess(pStatusRsp->vgAccess, pCfg->numOfVnodes);
J
jtao1735 已提交
487 488
  dnodeProcessModuleStatus(pCfg->moduleStatus);
  dnodeUpdateDnodeCfg(pCfg);
489

J
jtao1735 已提交
490 491 492 493
  dnodeUpdateMnodeInfos(pMnodes);
  taosTmrReset(dnodeSendStatusMsg, tsStatusInterval * 1000, NULL, tsDnodeTmr, &tsStatusTimer);
}

494 495 496 497 498 499 500 501 502
static bool dnodeCheckMnodeInfos(SDMMnodeInfos *pMnodes) {
  if (pMnodes->nodeNum <= 0 || pMnodes->nodeNum > 3) {
    dError("invalid mnode infos, num:%d", pMnodes->nodeNum);
    return false;
  }

  for (int32_t i = 0; i < pMnodes->nodeNum; ++i) {
    SDMMnodeInfo *pMnodeInfo = &pMnodes->nodeInfos[i];
    if (pMnodeInfo->nodeId <= 0 || strlen(pMnodeInfo->nodeEp) <= 5) {
S
Shengliang Guan 已提交
503
      dError("invalid mnode info:%d, nodeId:%d nodeEp:%s", i, pMnodeInfo->nodeId, pMnodeInfo->nodeEp);
504 505 506 507 508 509 510
      return false;
    }
  }

  return true;
}

J
jtao1735 已提交
511
static void dnodeUpdateMnodeInfos(SDMMnodeInfos *pMnodes) {
512 513
  bool mnodesChanged = (memcmp(&tsDMnodeInfos, pMnodes, sizeof(SDMMnodeInfos)) != 0);
  bool mnodesNotInit = (tsDMnodeInfos.nodeNum == 0);
J
jtao1735 已提交
514 515
  if (!(mnodesChanged || mnodesNotInit)) return;

516 517
  if (!dnodeCheckMnodeInfos(pMnodes)) return;

518
  memcpy(&tsDMnodeInfos, pMnodes, sizeof(SDMMnodeInfos));
519
  dInfo("mnode infos is changed, nodeNum:%d inUse:%d", tsDMnodeInfos.nodeNum, tsDMnodeInfos.inUse);
520
  for (int32_t i = 0; i < tsDMnodeInfos.nodeNum; i++) {
521
    dInfo("mnode index:%d, %s", tsDMnodeInfos.nodeInfos[i].nodeId, tsDMnodeInfos.nodeInfos[i].nodeEp);
522
  }
J
jtao1735 已提交
523

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
524 525
  tsDMnodeIpSet.inUse = tsDMnodeInfos.inUse;
  tsDMnodeIpSet.numOfIps = tsDMnodeInfos.nodeNum;
526
  for (int32_t i = 0; i < tsDMnodeInfos.nodeNum; i++) {
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
527
    taosGetFqdnPortFromEp(tsDMnodeInfos.nodeInfos[i].nodeEp, tsDMnodeIpSet.fqdn[i], &tsDMnodeIpSet.port[i]);
J
jtao1735 已提交
528 529 530 531 532 533 534
  }

  dnodeSaveMnodeInfos();
  sdbUpdateSync();
}

static bool dnodeReadMnodeInfos() {
H
Hui Li 已提交
535 536
  char ipFile[TSDB_FILENAME_LEN*2] = {0};
  
S
Shengliang Guan 已提交
537
  sprintf(ipFile, "%s/mnodeIpList.json", tsDnodeDir);
J
jtao1735 已提交
538 539
  FILE *fp = fopen(ipFile, "r");
  if (!fp) {
540
    dDebug("failed to read mnodeIpList.json, file not exist");
J
jtao1735 已提交
541 542 543 544 545 546 547 548 549 550
    return false;
  }

  bool  ret = false;
  int   maxLen = 2000;
  char *content = calloc(1, maxLen + 1);
  int   len = fread(content, 1, maxLen, fp);
  if (len <= 0) {
    free(content);
    fclose(fp);
S
Shengliang Guan 已提交
551
    dError("failed to read mnodeIpList.json, content is null");
J
jtao1735 已提交
552 553 554
    return false;
  }

555
  content[len] = 0;
J
jtao1735 已提交
556 557
  cJSON* root = cJSON_Parse(content);
  if (root == NULL) {
S
Shengliang Guan 已提交
558
    dError("failed to read mnodeIpList.json, invalid json format");
J
jtao1735 已提交
559 560 561 562 563
    goto PARSE_OVER;
  }

  cJSON* inUse = cJSON_GetObjectItem(root, "inUse");
  if (!inUse || inUse->type != cJSON_Number) {
S
Shengliang Guan 已提交
564
    dError("failed to read mnodeIpList.json, inUse not found");
J
jtao1735 已提交
565 566
    goto PARSE_OVER;
  }
567
  tsDMnodeInfos.inUse = inUse->valueint;
J
jtao1735 已提交
568 569 570

  cJSON* nodeNum = cJSON_GetObjectItem(root, "nodeNum");
  if (!nodeNum || nodeNum->type != cJSON_Number) {
S
Shengliang Guan 已提交
571
    dError("failed to read mnodeIpList.json, nodeNum not found");
J
jtao1735 已提交
572 573
    goto PARSE_OVER;
  }
574
  tsDMnodeInfos.nodeNum = nodeNum->valueint;
J
jtao1735 已提交
575 576 577

  cJSON* nodeInfos = cJSON_GetObjectItem(root, "nodeInfos");
  if (!nodeInfos || nodeInfos->type != cJSON_Array) {
S
Shengliang Guan 已提交
578
    dError("failed to read mnodeIpList.json, nodeInfos not found");
J
jtao1735 已提交
579 580 581 582
    goto PARSE_OVER;
  }

  int size = cJSON_GetArraySize(nodeInfos);
583
  if (size != tsDMnodeInfos.nodeNum) {
S
Shengliang Guan 已提交
584
    dError("failed to read mnodeIpList.json, nodeInfos size not matched");
J
jtao1735 已提交
585 586 587 588 589 590 591 592 593
    goto PARSE_OVER;
  }

  for (int i = 0; i < size; ++i) {
    cJSON* nodeInfo = cJSON_GetArrayItem(nodeInfos, i);
    if (nodeInfo == NULL) continue;

    cJSON *nodeId = cJSON_GetObjectItem(nodeInfo, "nodeId");
    if (!nodeId || nodeId->type != cJSON_Number) {
S
Shengliang Guan 已提交
594
      dError("failed to read mnodeIpList.json, nodeId not found");
J
jtao1735 已提交
595 596
      goto PARSE_OVER;
    }
597
    tsDMnodeInfos.nodeInfos[i].nodeId = nodeId->valueint;
J
jtao1735 已提交
598 599 600

    cJSON *nodeEp = cJSON_GetObjectItem(nodeInfo, "nodeEp");
    if (!nodeEp || nodeEp->type != cJSON_String || nodeEp->valuestring == NULL) {
S
Shengliang Guan 已提交
601
      dError("failed to read mnodeIpList.json, nodeName not found");
J
jtao1735 已提交
602 603
      goto PARSE_OVER;
    }
604
    strncpy(tsDMnodeInfos.nodeInfos[i].nodeEp, nodeEp->valuestring, TSDB_EP_LEN);
J
jtao1735 已提交
605 606 607 608
 }

  ret = true;

609
  dInfo("read mnode iplist successed, numOfIps:%d inUse:%d", tsDMnodeInfos.nodeNum, tsDMnodeInfos.inUse);
610
  for (int32_t i = 0; i < tsDMnodeInfos.nodeNum; i++) {
611
    dInfo("mnode:%d, %s", tsDMnodeInfos.nodeInfos[i].nodeId, tsDMnodeInfos.nodeInfos[i].nodeEp);
J
jtao1735 已提交
612 613 614 615 616 617 618 619 620 621 622
  }

PARSE_OVER:
  free(content);
  cJSON_Delete(root);
  fclose(fp);
  return ret;
}

static void dnodeSaveMnodeInfos() {
  char ipFile[TSDB_FILENAME_LEN] = {0};
S
Shengliang Guan 已提交
623
  sprintf(ipFile, "%s/mnodeIpList.json", tsDnodeDir);
J
jtao1735 已提交
624 625 626 627 628 629 630 631
  FILE *fp = fopen(ipFile, "w");
  if (!fp) return;

  int32_t len = 0;
  int32_t maxLen = 2000;
  char *  content = calloc(1, maxLen + 1);

  len += snprintf(content + len, maxLen - len, "{\n");
632 633
  len += snprintf(content + len, maxLen - len, "  \"inUse\": %d,\n", tsDMnodeInfos.inUse);
  len += snprintf(content + len, maxLen - len, "  \"nodeNum\": %d,\n", tsDMnodeInfos.nodeNum);
J
jtao1735 已提交
634
  len += snprintf(content + len, maxLen - len, "  \"nodeInfos\": [{\n");
635 636 637 638
  for (int32_t i = 0; i < tsDMnodeInfos.nodeNum; i++) {
    len += snprintf(content + len, maxLen - len, "    \"nodeId\": %d,\n", tsDMnodeInfos.nodeInfos[i].nodeId);
    len += snprintf(content + len, maxLen - len, "    \"nodeEp\": \"%s\"\n", tsDMnodeInfos.nodeInfos[i].nodeEp);
    if (i < tsDMnodeInfos.nodeNum -1) {
J
jtao1735 已提交
639 640 641 642 643 644 645 646
      len += snprintf(content + len, maxLen - len, "  },{\n");  
    } else {
      len += snprintf(content + len, maxLen - len, "  }]\n");  
    }
  }
  len += snprintf(content + len, maxLen - len, "}\n"); 

  fwrite(content, 1, len, fp);
H
Hui Li 已提交
647
  fflush(fp);
J
jtao1735 已提交
648 649 650
  fclose(fp);
  free(content);
  
651
  dInfo("save mnode iplist successed");
J
jtao1735 已提交
652 653 654
}

char *dnodeGetMnodeMasterEp() {
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
655
  return tsDMnodeInfos.nodeInfos[tsDMnodeIpSet.inUse].nodeEp;
J
jtao1735 已提交
656 657 658
}

void* dnodeGetMnodeInfos() {
659
  return &tsDMnodeInfos;
J
jtao1735 已提交
660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
}

static void dnodeSendStatusMsg(void *handle, void *tmrId) {
  if (tsDnodeTmr == NULL) {
    dError("dnode timer is already released");
    return;
  }

  if (tsStatusTimer == NULL) {
    taosTmrReset(dnodeSendStatusMsg, tsStatusInterval * 1000, NULL, tsDnodeTmr, &tsStatusTimer);
    dError("failed to start status timer");
    return;
  }

  int32_t contLen = sizeof(SDMStatusMsg) + TSDB_MAX_VNODES * sizeof(SVnodeLoad);
  SDMStatusMsg *pStatus = rpcMallocCont(contLen);
  if (pStatus == NULL) {
    taosTmrReset(dnodeSendStatusMsg, tsStatusInterval * 1000, NULL, tsDnodeTmr, &tsStatusTimer);
    dError("failed to malloc status message");
    return;
  }

  //strcpy(pStatus->dnodeName, tsDnodeName);
  pStatus->version          = htonl(tsVersion);
  pStatus->dnodeId          = htonl(tsDnodeCfg.dnodeId);
  strcpy(pStatus->dnodeEp, tsLocalEp);
  pStatus->lastReboot       = htonl(tsRebootTime);
  pStatus->numOfTotalVnodes = htons((uint16_t) tsNumOfTotalVnodes);
  pStatus->numOfCores       = htons((uint16_t) tsNumOfCores);
  pStatus->diskAvailable    = tsAvailDataDirGB;
  pStatus->alternativeRole  = (uint8_t) tsAlternativeRole;
H
Hui Li 已提交
691 692

  // fill cluster cfg parameters
H
Hui Li 已提交
693 694 695 696 697 698 699 700
  pStatus->clusterCfg.numOfMnodes        = tsNumOfMnodes;
  pStatus->clusterCfg.mnodeEqualVnodeNum = tsMnodeEqualVnodeNum;
  pStatus->clusterCfg.offlineThreshold   = tsOfflineThreshold;
  pStatus->clusterCfg.statusInterval     = tsStatusInterval;
  strcpy(pStatus->clusterCfg.arbitrator, tsArbitrator);
  strcpy(pStatus->clusterCfg.timezone, tsTimezone);
  strcpy(pStatus->clusterCfg.locale, tsLocale);
  strcpy(pStatus->clusterCfg.charset, tsCharset);  
J
jtao1735 已提交
701 702 703 704 705 706 707 708 709 710 711
  
  vnodeBuildStatusMsg(pStatus);
  contLen = sizeof(SDMStatusMsg) + pStatus->openVnodes * sizeof(SVnodeLoad);
  pStatus->openVnodes = htons(pStatus->openVnodes);
  
  SRpcMsg rpcMsg = {
    .pCont   = pStatus,
    .contLen = contLen,
    .msgType = TSDB_MSG_TYPE_DM_STATUS
  };

陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
712 713 714
  SRpcIpSet ipSet;
  dnodeGetMnodeIpSetForPeer(&ipSet);
  dnodeSendMsgToDnode(&ipSet, &rpcMsg);
J
jtao1735 已提交
715 716 717
}

static bool dnodeReadDnodeCfg() {
H
Hui Li 已提交
718 719
  char dnodeCfgFile[TSDB_FILENAME_LEN*2] = {0};
  
J
jtao1735 已提交
720 721 722 723
  sprintf(dnodeCfgFile, "%s/dnodeCfg.json", tsDnodeDir);

  FILE *fp = fopen(dnodeCfgFile, "r");
  if (!fp) {
724
    dDebug("failed to read dnodeCfg.json, file not exist");
J
jtao1735 已提交
725 726 727 728 729 730 731 732 733 734 735 736 737 738
    return false;
  }

  bool  ret = false;
  int   maxLen = 100;
  char *content = calloc(1, maxLen + 1);
  int   len = fread(content, 1, maxLen, fp);
  if (len <= 0) {
    free(content);
    fclose(fp);
    dError("failed to read dnodeCfg.json, content is null");
    return false;
  }

739
  content[len] = 0;
J
jtao1735 已提交
740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
  cJSON* root = cJSON_Parse(content);
  if (root == NULL) {
    dError("failed to read dnodeCfg.json, invalid json format");
    goto PARSE_CFG_OVER;
  }

  cJSON* dnodeId = cJSON_GetObjectItem(root, "dnodeId");
  if (!dnodeId || dnodeId->type != cJSON_Number) {
    dError("failed to read dnodeCfg.json, dnodeId not found");
    goto PARSE_CFG_OVER;
  }
  tsDnodeCfg.dnodeId = dnodeId->valueint;

  ret = true;

755
  dInfo("read numOfVnodes successed, dnodeId:%d", tsDnodeCfg.dnodeId);
J
jtao1735 已提交
756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779

PARSE_CFG_OVER:
  free(content);
  cJSON_Delete(root);
  fclose(fp);
  return ret;
}

static void dnodeSaveDnodeCfg() {
  char dnodeCfgFile[TSDB_FILENAME_LEN] = {0};
  sprintf(dnodeCfgFile, "%s/dnodeCfg.json", tsDnodeDir);

  FILE *fp = fopen(dnodeCfgFile, "w");
  if (!fp) return;

  int32_t len = 0;
  int32_t maxLen = 100;
  char *  content = calloc(1, maxLen + 1);

  len += snprintf(content + len, maxLen - len, "{\n");
  len += snprintf(content + len, maxLen - len, "  \"dnodeId\": %d\n", tsDnodeCfg.dnodeId);
  len += snprintf(content + len, maxLen - len, "}\n"); 

  fwrite(content, 1, len, fp);
H
Hui Li 已提交
780
  fflush(fp);
J
jtao1735 已提交
781 782 783
  fclose(fp);
  free(content);
  
784
  dInfo("save dnodeId successed");
J
jtao1735 已提交
785 786 787 788
}

void dnodeUpdateDnodeCfg(SDMDnodeCfg *pCfg) {
  if (tsDnodeCfg.dnodeId == 0) {
789
    dInfo("dnodeId is set to %d", pCfg->dnodeId);  
J
jtao1735 已提交
790 791 792 793 794 795 796 797 798
    tsDnodeCfg.dnodeId = pCfg->dnodeId;
    dnodeSaveDnodeCfg();
  }
}

int32_t dnodeGetDnodeId() {
  return tsDnodeCfg.dnodeId;
}

799
void dnodeSendRedirectMsg(SRpcMsg *rpcMsg, bool forShell) {
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
800
  SRpcConnInfo connInfo = {0};
801
  rpcGetConnInfo(rpcMsg->handle, &connInfo);
S
Shengliang Guan 已提交
802 803

  SRpcIpSet ipSet = {0};
804 805 806 807 808 809
  if (forShell) {
    dnodeGetMnodeIpSetForShell(&ipSet);
  } else {
    dnodeGetMnodeIpSetForPeer(&ipSet);
  }
  
810
  dDebug("msg:%s will be redirected, dnodeIp:%s user:%s, numOfIps:%d inUse:%d", taosMsg[rpcMsg->msgType],
S
Shengliang Guan 已提交
811 812 813
         taosIpStr(connInfo.clientIp), connInfo.user, ipSet.numOfIps, ipSet.inUse);

  for (int i = 0; i < ipSet.numOfIps; ++i) {
814
    dDebug("mnode index:%d %s:%d", i, ipSet.fqdn[i], ipSet.port[i]);
S
Shengliang Guan 已提交
815 816 817
    ipSet.port[i] = htons(ipSet.port[i]);
  }

818
  rpcSendRedirectRsp(rpcMsg->handle, &ipSet);
819
}