dndVnodes.c 34.4 KB
Newer Older
S
Shengliang Guan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http:www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE
#include "dndVnodes.h"
#include "dndTransport.h"

20 21 22 23 24
typedef struct {
  int32_t  vgId;
  int32_t  vgVersion;
  int8_t   dropped;
  uint64_t dbUid;
25
  char     db[TSDB_DB_FNAME_LEN];
26 27 28
  char     path[PATH_MAX + 20];
} SWrapperCfg;

S
Shengliang Guan 已提交
29
typedef struct {
30 31 32 33 34 35 36 37 38 39 40 41 42 43
  int32_t     vgId;
  int32_t     refCount;
  int32_t     vgVersion;
  int8_t      dropped;
  int8_t      accessState;
  uint64_t    dbUid;
  char       *db;
  char       *path;
  SVnode     *pImpl;
  STaosQueue *pWriteQ;
  STaosQueue *pSyncQ;
  STaosQueue *pApplyQ;
  STaosQueue *pQueryQ;
  STaosQueue* pFetchQ;
S
Shengliang Guan 已提交
44 45 46
} SVnodeObj;

typedef struct {
47 48 49 50
  int32_t      vnodeNum;
  int32_t      opened;
  int32_t      failed;
  int32_t      threadIndex;
51
  pthread_t    thread;
52 53
  SDnode      *pDnode;
  SWrapperCfg *pCfgs;
S
Shengliang Guan 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
} SVnodeThread;

static int32_t dndInitVnodeReadWorker(SDnode *pDnode);
static int32_t dndInitVnodeWriteWorker(SDnode *pDnode);
static int32_t dndInitVnodeSyncWorker(SDnode *pDnode);
static void    dndCleanupVnodeReadWorker(SDnode *pDnode);
static void    dndCleanupVnodeWriteWorker(SDnode *pDnode);
static void    dndCleanupVnodeSyncWorker(SDnode *pDnode);
static int32_t dndAllocVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dndAllocVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dndAllocVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dndAllocVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dndAllocVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode);
static void    dndFreeVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode);
static void    dndFreeVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode);
static void    dndFreeVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode);
static void    dndFreeVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode);
static void    dndFreeVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode);

73 74
static void    dndProcessVnodeQueryQueue(SVnodeObj *pVnode, SRpcMsg *pMsg);
static void    dndProcessVnodeFetchQueue(SVnodeObj *pVnode, SRpcMsg *pMsg);
75 76 77
static void    dndProcessVnodeWriteQueue(SVnodeObj *pVnode, STaosQall *qall, int32_t numOfMsgs);
static void    dndProcessVnodeApplyQueue(SVnodeObj *pVnode, STaosQall *qall, int32_t numOfMsgs);
static void    dndProcessVnodeSyncQueue(SVnodeObj *pVnode, STaosQall *qall, int32_t numOfMsgs);
S
Shengliang Guan 已提交
78 79 80 81
void           dndProcessVnodeQueryMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void           dndProcessVnodeFetchMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void           dndProcessVnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void           dndProcessVnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
82
static int32_t dndPutMsgIntoVnodeApplyQueue(SDnode *pDnode, int32_t vgId, SRpcMsg *pMsg);
S
Shengliang Guan 已提交
83

84
static SVnodeObj  *dndAcquireVnode(SDnode *pDnode, int32_t vgId);
S
Shengliang Guan 已提交
85
static void        dndReleaseVnode(SDnode *pDnode, SVnodeObj *pVnode);
86 87
static int32_t     dndOpenVnode(SDnode *pDnode, SWrapperCfg *pCfg, SVnode *pImpl);
static void        dndCloseVnode(SDnode *pDnode, SVnodeObj *pVnode);
S
Shengliang Guan 已提交
88
static SVnodeObj **dndGetVnodesFromHash(SDnode *pDnode, int32_t *numOfVnodes);
89
static int32_t     dndGetVnodesFromFile(SDnode *pDnode, SWrapperCfg **ppCfgs, int32_t *numOfVnodes);
S
Shengliang Guan 已提交
90 91 92 93 94 95 96
static int32_t     dndWriteVnodesToFile(SDnode *pDnode);

static int32_t dndOpenVnodes(SDnode *pDnode);
static void    dndCloseVnodes(SDnode *pDnode);

static SVnodeObj *dndAcquireVnode(SDnode *pDnode, int32_t vgId) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
97
  SVnodeObj   *pVnode = NULL;
S
Shengliang Guan 已提交
98 99 100 101 102 103 104 105 106 107 108
  int32_t      refCount = 0;

  taosRLockLatch(&pMgmt->latch);
  taosHashGetClone(pMgmt->hash, &vgId, sizeof(int32_t), (void *)&pVnode);
  if (pVnode == NULL) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
  } else {
    refCount = atomic_add_fetch_32(&pVnode->refCount, 1);
  }
  taosRUnLockLatch(&pMgmt->latch);

109 110 111 112
  if (pVnode != NULL) {
    dTrace("vgId:%d, acquire vnode, refCount:%d", pVnode->vgId, refCount);
  }

S
Shengliang Guan 已提交
113 114 115 116
  return pVnode;
}

static void dndReleaseVnode(SDnode *pDnode, SVnodeObj *pVnode) {
117 118
  if (pVnode == NULL) return;

S
Shengliang Guan 已提交
119 120 121
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;

  taosRLockLatch(&pMgmt->latch);
122
  int32_t refCount = atomic_sub_fetch_32(&pVnode->refCount, 1);
S
Shengliang Guan 已提交
123 124
  taosRUnLockLatch(&pMgmt->latch);

125
  dTrace("vgId:%d, release vnode, refCount:%d", pVnode->vgId, refCount);
S
Shengliang Guan 已提交
126 127
}

128
static int32_t dndOpenVnode(SDnode *pDnode, SWrapperCfg *pCfg, SVnode *pImpl) {
S
Shengliang Guan 已提交
129
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
130
  SVnodeObj   *pVnode = calloc(1, sizeof(SVnodeObj));
S
Shengliang Guan 已提交
131 132 133 134 135
  if (pVnode == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

136 137
  pVnode->vgId = pCfg->vgId;
  pVnode->refCount = 1;
S
Shengliang Guan 已提交
138 139 140
  pVnode->dropped = 0;
  pVnode->accessState = TSDB_VN_ALL_ACCCESS;
  pVnode->pImpl = pImpl;
141 142 143 144
  pVnode->vgVersion = pCfg->vgVersion;
  pVnode->dbUid = pCfg->dbUid;
  pVnode->db = tstrdup(pCfg->db);
  pVnode->path = tstrdup(pCfg->path);
S
Shengliang Guan 已提交
145

146
  if (pVnode->path == NULL || pVnode->db == NULL) {
S
Shengliang Guan 已提交
147 148 149 150
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

S
Shengliang Guan 已提交
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
  if (dndAllocVnodeQueryQueue(pDnode, pVnode) != 0) {
    return -1;
  }

  if (dndAllocVnodeFetchQueue(pDnode, pVnode) != 0) {
    return -1;
  }

  if (dndAllocVnodeWriteQueue(pDnode, pVnode) != 0) {
    return -1;
  }

  if (dndAllocVnodeApplyQueue(pDnode, pVnode) != 0) {
    return -1;
  }

  if (dndAllocVnodeSyncQueue(pDnode, pVnode) != 0) {
    return -1;
  }

  taosWLockLatch(&pMgmt->latch);
172
  int32_t code = taosHashPut(pMgmt->hash, &pVnode->vgId, sizeof(int32_t), &pVnode, sizeof(SVnodeObj *));
S
Shengliang Guan 已提交
173 174 175 176 177 178 179 180
  taosWUnLockLatch(&pMgmt->latch);

  if (code != 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
  }
  return code;
}

181
static void dndCloseVnode(SDnode *pDnode, SVnodeObj *pVnode) {
S
Shengliang Guan 已提交
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  taosWLockLatch(&pMgmt->latch);
  taosHashRemove(pMgmt->hash, &pVnode->vgId, sizeof(int32_t));
  taosWUnLockLatch(&pMgmt->latch);

  dndReleaseVnode(pDnode, pVnode);
  while (pVnode->refCount > 0) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pWriteQ)) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pSyncQ)) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pApplyQ)) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pQueryQ)) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pFetchQ)) taosMsleep(10);

  dndFreeVnodeQueryQueue(pDnode, pVnode);
  dndFreeVnodeFetchQueue(pDnode, pVnode);
  dndFreeVnodeWriteQueue(pDnode, pVnode);
  dndFreeVnodeApplyQueue(pDnode, pVnode);
  dndFreeVnodeSyncQueue(pDnode, pVnode);
S
Shengliang Guan 已提交
200 201 202 203
  
  vnodeClose(pVnode->pImpl);
  pVnode->pImpl = NULL;

204 205 206
  free(pVnode->path);
  free(pVnode->db);
  free(pVnode);
S
Shengliang Guan 已提交
207 208 209 210 211 212 213 214 215 216 217 218 219
}

static SVnodeObj **dndGetVnodesFromHash(SDnode *pDnode, int32_t *numOfVnodes) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  taosRLockLatch(&pMgmt->latch);

  int32_t     num = 0;
  int32_t     size = taosHashGetSize(pMgmt->hash);
  SVnodeObj **pVnodes = calloc(size, sizeof(SVnodeObj *));

  void *pIter = taosHashIterate(pMgmt->hash, NULL);
  while (pIter) {
    SVnodeObj **ppVnode = pIter;
220 221 222 223 224
    SVnodeObj  *pVnode = *ppVnode;
    if (pVnode && num < size) {
      int32_t refCount = atomic_add_fetch_32(&pVnode->refCount, 1);
      dTrace("vgId:%d, acquire vnode, refCount:%d", pVnode->vgId, refCount);
      pVnodes[num] = (*ppVnode);
S
Shengliang Guan 已提交
225
      num++;
226 227 228
      pIter = taosHashIterate(pMgmt->hash, pIter);
    } else {
      taosHashCancelIterate(pMgmt->hash, pIter);
S
Shengliang Guan 已提交
229 230 231 232 233 234 235 236 237
    }
  }

  taosRUnLockLatch(&pMgmt->latch);
  *numOfVnodes = num;

  return pVnodes;
}

238 239 240 241 242 243 244 245 246
static int32_t dndGetVnodesFromFile(SDnode *pDnode, SWrapperCfg **ppCfgs, int32_t *numOfVnodes) {
  int32_t      code = TSDB_CODE_DND_VNODE_READ_FILE_ERROR;
  int32_t      len = 0;
  int32_t      maxLen = 30000;
  char        *content = calloc(1, maxLen + 1);
  cJSON       *root = NULL;
  FILE        *fp = NULL;
  char         file[PATH_MAX + 20] = {0};
  SWrapperCfg *pCfgs = NULL;
S
Shengliang Guan 已提交
247 248 249 250

  snprintf(file, PATH_MAX + 20, "%s/vnodes.json", pDnode->dir.vnodes);

  fp = fopen(file, "r");
S
Shengliang Guan 已提交
251
  if (fp == NULL) {
S
Shengliang Guan 已提交
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
    dDebug("file %s not exist", file);
    code = 0;
    goto PRASE_VNODE_OVER;
  }

  len = (int32_t)fread(content, 1, maxLen, fp);
  if (len <= 0) {
    dError("failed to read %s since content is null", file);
    goto PRASE_VNODE_OVER;
  }

  content[len] = 0;
  root = cJSON_Parse(content);
  if (root == NULL) {
    dError("failed to read %s since invalid json format", file);
    goto PRASE_VNODE_OVER;
  }

  cJSON *vnodes = cJSON_GetObjectItem(root, "vnodes");
  if (!vnodes || vnodes->type != cJSON_Array) {
    dError("failed to read %s since vnodes not found", file);
    goto PRASE_VNODE_OVER;
  }

  int32_t vnodesNum = cJSON_GetArraySize(vnodes);
  if (vnodesNum <= 0) {
    dError("failed to read %s since vnodes size:%d invalid", file, vnodesNum);
    goto PRASE_VNODE_OVER;
  }

282 283
  pCfgs = calloc(vnodesNum, sizeof(SWrapperCfg));
  if (pCfgs == NULL) {
S
Shengliang Guan 已提交
284 285 286 287 288
    dError("failed to read %s since out of memory", file);
    goto PRASE_VNODE_OVER;
  }

  for (int32_t i = 0; i < vnodesNum; ++i) {
289 290
    cJSON       *vnode = cJSON_GetArrayItem(vnodes, i);
    SWrapperCfg *pCfg = &pCfgs[i];
S
Shengliang Guan 已提交
291 292

    cJSON *vgId = cJSON_GetObjectItem(vnode, "vgId");
293
    if (!vgId || vgId->type != cJSON_Number) {
S
Shengliang Guan 已提交
294 295 296
      dError("failed to read %s since vgId not found", file);
      goto PRASE_VNODE_OVER;
    }
297
    pCfg->vgId = vgId->valueint;
298
    snprintf(pCfg->path, sizeof(pCfg->path), "%s/vnode%d", pDnode->dir.vnodes, pCfg->vgId);
S
Shengliang Guan 已提交
299 300

    cJSON *dropped = cJSON_GetObjectItem(vnode, "dropped");
301
    if (!dropped || dropped->type != cJSON_Number) {
S
Shengliang Guan 已提交
302 303 304
      dError("failed to read %s since dropped not found", file);
      goto PRASE_VNODE_OVER;
    }
305
    pCfg->dropped = dropped->valueint;
306 307

    cJSON *vgVersion = cJSON_GetObjectItem(vnode, "vgVersion");
308
    if (!vgVersion || vgVersion->type != cJSON_Number) {
309 310 311
      dError("failed to read %s since vgVersion not found", file);
      goto PRASE_VNODE_OVER;
    }
312
    pCfg->vgVersion = vgVersion->valueint;
313 314 315 316 317 318 319 320 321 322 323 324 325

    cJSON *dbUid = cJSON_GetObjectItem(vnode, "dbUid");
    if (!dbUid || dbUid->type != cJSON_String) {
      dError("failed to read %s since dbUid not found", file);
      goto PRASE_VNODE_OVER;
    }
    pCfg->dbUid = atoll(dbUid->valuestring);

    cJSON *db = cJSON_GetObjectItem(vnode, "db");
    if (!db || db->type != cJSON_String) {
      dError("failed to read %s since db not found", file);
      goto PRASE_VNODE_OVER;
    }
326
    tstrncpy(pCfg->db, db->valuestring, TSDB_DB_FNAME_LEN);
S
Shengliang Guan 已提交
327 328
  }

329 330
  *ppCfgs = pCfgs;
  *numOfVnodes = vnodesNum;
S
Shengliang Guan 已提交
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
  code = 0;
  dInfo("succcessed to read file %s", file);

PRASE_VNODE_OVER:
  if (content != NULL) free(content);
  if (root != NULL) cJSON_Delete(root);
  if (fp != NULL) fclose(fp);

  return code;
}

static int32_t dndWriteVnodesToFile(SDnode *pDnode) {
  char file[PATH_MAX + 20] = {0};
  char realfile[PATH_MAX + 20] = {0};
  snprintf(file, PATH_MAX + 20, "%s/vnodes.json.bak", pDnode->dir.vnodes);
  snprintf(realfile, PATH_MAX + 20, "%s/vnodes.json", pDnode->dir.vnodes);

  FILE *fp = fopen(file, "w");
349
  if (fp == NULL) {
S
Shengliang Guan 已提交
350 351 352 353 354 355 356
    terrno = TAOS_SYSTEM_ERROR(errno);
    dError("failed to write %s since %s", file, terrstr());
    return -1;
  }
  int32_t     numOfVnodes = 0;
  SVnodeObj **pVnodes = dndGetVnodesFromHash(pDnode, &numOfVnodes);

357 358 359 360
  int32_t len = 0;
  int32_t maxLen = 65536;
  char   *content = calloc(1, maxLen + 1);

S
Shengliang Guan 已提交
361
  len += snprintf(content + len, maxLen - len, "{\n");
362
  len += snprintf(content + len, maxLen - len, "  \"vnodes\": [\n");
S
Shengliang Guan 已提交
363 364
  for (int32_t i = 0; i < numOfVnodes; ++i) {
    SVnodeObj *pVnode = pVnodes[i];
365 366 367 368 369 370
    len += snprintf(content + len, maxLen - len, "    {\n");
    len += snprintf(content + len, maxLen - len, "      \"vgId\": %d,\n", pVnode->vgId);
    len += snprintf(content + len, maxLen - len, "      \"dropped\": %d,\n", pVnode->dropped);
    len += snprintf(content + len, maxLen - len, "      \"vgVersion\": %d,\n", pVnode->vgVersion);
    len += snprintf(content + len, maxLen - len, "      \"dbUid\": \"%" PRIu64 "\",\n", pVnode->dbUid);
    len += snprintf(content + len, maxLen - len, "      \"db\": \"%s\"\n", pVnode->db);
S
Shengliang Guan 已提交
371
    if (i < numOfVnodes - 1) {
372
      len += snprintf(content + len, maxLen - len, "    },\n");
S
Shengliang Guan 已提交
373
    } else {
374
      len += snprintf(content + len, maxLen - len, "    }\n");
S
Shengliang Guan 已提交
375 376
    }
  }
377
  len += snprintf(content + len, maxLen - len, "  ]\n");
S
Shengliang Guan 已提交
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
  len += snprintf(content + len, maxLen - len, "}\n");

  fwrite(content, 1, len, fp);
  taosFsyncFile(fileno(fp));
  fclose(fp);
  free(content);
  terrno = 0;

  for (int32_t i = 0; i < numOfVnodes; ++i) {
    SVnodeObj *pVnode = pVnodes[i];
    dndReleaseVnode(pDnode, pVnode);
  }

  if (pVnodes != NULL) {
    free(pVnodes);
  }

S
Shengliang Guan 已提交
395
  dDebug("successed to write %s", realfile);
S
Shengliang Guan 已提交
396 397 398 399 400
  return taosRenameFile(file, realfile);
}

static void *dnodeOpenVnodeFunc(void *param) {
  SVnodeThread *pThread = param;
401 402
  SDnode       *pDnode = pThread->pDnode;
  SVnodesMgmt  *pMgmt = &pDnode->vmgmt;
S
Shengliang Guan 已提交
403 404 405 406 407

  dDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum);
  setThreadName("open-vnodes");

  for (int32_t v = 0; v < pThread->vnodeNum; ++v) {
408
    SWrapperCfg *pCfg = &pThread->pCfgs[v];
S
Shengliang Guan 已提交
409 410

    char stepDesc[TSDB_STEP_DESC_LEN] = {0};
411
    snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to restore, %d of %d have been opened", pCfg->vgId,
S
Shengliang Guan 已提交
412 413 414
             pMgmt->openVnodes, pMgmt->totalVnodes);
    dndReportStartup(pDnode, "open-vnodes", stepDesc);

415
    SVnode *pImpl = vnodeOpen(pCfg->path, NULL);
S
Shengliang Guan 已提交
416
    if (pImpl == NULL) {
417
      dError("vgId:%d, failed to open vnode by thread:%d", pCfg->vgId, pThread->threadIndex);
S
Shengliang Guan 已提交
418 419
      pThread->failed++;
    } else {
420
      dndOpenVnode(pDnode, pCfg, pImpl);
421
      dDebug("vgId:%d, is opened by thread:%d", pCfg->vgId, pThread->threadIndex);
S
Shengliang Guan 已提交
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
      pThread->opened++;
    }

    atomic_add_fetch_32(&pMgmt->openVnodes, 1);
  }

  dDebug("thread:%d, total vnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened,
         pThread->failed);
  return NULL;
}

static int32_t dndOpenVnodes(SDnode *pDnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  taosInitRWLatch(&pMgmt->latch);

  pMgmt->hash = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
  if (pMgmt->hash == NULL) {
    dError("failed to init vnode hash");
S
Shengliang Guan 已提交
440
    terrno = TSDB_CODE_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
441 442 443
    return -1;
  }

444 445 446
  SWrapperCfg *pCfgs = NULL;
  int32_t      numOfVnodes = 0;
  if (dndGetVnodesFromFile(pDnode, &pCfgs, &numOfVnodes) != 0) {
S
Shengliang Guan 已提交
447 448 449 450 451 452
    dInfo("failed to get vnode list from disk since %s", terrstr());
    return -1;
  }

  pMgmt->totalVnodes = numOfVnodes;

453
  int32_t threadNum = pDnode->opt.numOfCores;
S
Shengliang Guan 已提交
454 455 456 457 458
  int32_t vnodesPerThread = numOfVnodes / threadNum + 1;

  SVnodeThread *threads = calloc(threadNum, sizeof(SVnodeThread));
  for (int32_t t = 0; t < threadNum; ++t) {
    threads[t].threadIndex = t;
459
    threads[t].pDnode = pDnode;
460
    threads[t].pCfgs = calloc(vnodesPerThread, sizeof(SWrapperCfg));
S
Shengliang Guan 已提交
461 462 463 464 465
  }

  for (int32_t v = 0; v < numOfVnodes; ++v) {
    int32_t       t = v % threadNum;
    SVnodeThread *pThread = &threads[t];
466
    pThread->pCfgs[pThread->vnodeNum++] = pCfgs[v];
S
Shengliang Guan 已提交
467 468 469 470 471 472 473 474
  }

  dInfo("start %d threads to open %d vnodes", threadNum, numOfVnodes);

  for (int32_t t = 0; t < threadNum; ++t) {
    SVnodeThread *pThread = &threads[t];
    if (pThread->vnodeNum == 0) continue;

475 476 477 478
    pthread_attr_t thAttr;
    pthread_attr_init(&thAttr);
    pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE);
    if (pthread_create(&pThread->thread, &thAttr, dnodeOpenVnodeFunc, pThread) != 0) {
S
Shengliang Guan 已提交
479 480
      dError("thread:%d, failed to create thread to open vnode, reason:%s", pThread->threadIndex, strerror(errno));
    }
481 482

    pthread_attr_destroy(&thAttr);
S
Shengliang Guan 已提交
483 484 485 486
  }

  for (int32_t t = 0; t < threadNum; ++t) {
    SVnodeThread *pThread = &threads[t];
487 488 489
    if (pThread->vnodeNum > 0 && taosCheckPthreadValid(pThread->thread)) {
      pthread_join(pThread->thread, NULL);
    }
490
    free(pThread->pCfgs);
S
Shengliang Guan 已提交
491 492
  }
  free(threads);
493
  free(pCfgs);
S
Shengliang Guan 已提交
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510

  if (pMgmt->openVnodes != pMgmt->totalVnodes) {
    dError("there are total vnodes:%d, opened:%d", pMgmt->totalVnodes, pMgmt->openVnodes);
    return -1;
  } else {
    dInfo("total vnodes:%d open successfully", pMgmt->totalVnodes);
    return 0;
  }
}

static void dndCloseVnodes(SDnode *pDnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;

  int32_t     numOfVnodes = 0;
  SVnodeObj **pVnodes = dndGetVnodesFromHash(pDnode, &numOfVnodes);

  for (int32_t i = 0; i < numOfVnodes; ++i) {
511 512
    dndReleaseVnode(pDnode, pVnodes[i]);
    dndCloseVnode(pDnode, pVnodes[i]);
S
Shengliang Guan 已提交
513 514 515 516 517 518 519 520 521 522 523 524 525 526
  }

  if (pVnodes != NULL) {
    free(pVnodes);
  }

  if (pMgmt->hash != NULL) {
    taosHashCleanup(pMgmt->hash);
    pMgmt->hash = NULL;
  }

  dInfo("total vnodes:%d are all closed", numOfVnodes);
}

527
static SCreateVnodeMsg *dndParseCreateVnodeReq(SRpcMsg *rpcMsg) {
S
Shengliang Guan 已提交
528
  SCreateVnodeMsg *pCreate = rpcMsg->pCont;
529 530 531
  pCreate->vgId = htonl(pCreate->vgId);
  pCreate->dnodeId = htonl(pCreate->dnodeId);
  pCreate->dbUid = htobe64(pCreate->dbUid);
532
  pCreate->vgVersion = htonl(pCreate->vgVersion);
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
  pCreate->cacheBlockSize = htonl(pCreate->cacheBlockSize);
  pCreate->totalBlocks = htonl(pCreate->totalBlocks);
  pCreate->daysPerFile = htonl(pCreate->daysPerFile);
  pCreate->daysToKeep0 = htonl(pCreate->daysToKeep0);
  pCreate->daysToKeep1 = htonl(pCreate->daysToKeep1);
  pCreate->daysToKeep2 = htonl(pCreate->daysToKeep2);
  pCreate->minRows = htonl(pCreate->minRows);
  pCreate->maxRows = htonl(pCreate->maxRows);
  pCreate->commitTime = htonl(pCreate->commitTime);
  pCreate->fsyncPeriod = htonl(pCreate->fsyncPeriod);
  for (int r = 0; r < pCreate->replica; ++r) {
    SReplica *pReplica = &pCreate->replicas[r];
    pReplica->id = htonl(pReplica->id);
    pReplica->port = htons(pReplica->port);
  }

549 550
  return pCreate;
}
S
Shengliang Guan 已提交
551

552
static void dndGenerateVnodeCfg(SCreateVnodeMsg *pCreate, SVnodeCfg *pCfg) {
553 554 555 556 557 558 559 560
  pCfg->wsize = pCreate->cacheBlockSize;
  pCfg->ssize = pCreate->cacheBlockSize;
  pCfg->wsize = pCreate->cacheBlockSize;
  pCfg->lsize = pCreate->cacheBlockSize;
  pCfg->isHeapAllocator = true;
  pCfg->ttl = 4;
  pCfg->keep = pCreate->daysToKeep0;
  pCfg->isWeak = true;
H
Hongze Cheng 已提交
561
  pCfg->tsdbCfg.keep = pCreate->daysToKeep0;
562 563 564 565 566 567 568 569 570 571 572
  pCfg->tsdbCfg.keep1 = pCreate->daysToKeep2;
  pCfg->tsdbCfg.keep2 = pCreate->daysToKeep0;
  pCfg->tsdbCfg.lruCacheSize = pCreate->cacheBlockSize;
  pCfg->metaCfg.lruSize = pCreate->cacheBlockSize;
  pCfg->walCfg.fsyncPeriod = pCreate->fsyncPeriod;
  pCfg->walCfg.level = pCreate->walLevel;
  pCfg->walCfg.retentionPeriod = 10;
  pCfg->walCfg.retentionSize = 128;
  pCfg->walCfg.rollPeriod = 128;
  pCfg->walCfg.segSize = 128;
  pCfg->walCfg.vgId = pCreate->vgId;
573 574 575
}

static void dndGenerateWrapperCfg(SDnode *pDnode, SCreateVnodeMsg *pCreate, SWrapperCfg *pCfg) {
576
  memcpy(pCfg->db, pCreate->db, TSDB_DB_FNAME_LEN);
577 578 579 580 581
  pCfg->dbUid = pCreate->dbUid;
  pCfg->dropped = 0;
  snprintf(pCfg->path, sizeof(pCfg->path), "%s/vnode%d", pDnode->dir.vnodes, pCreate->vgId);
  pCfg->vgId = pCreate->vgId;
  pCfg->vgVersion = pCreate->vgVersion;
S
Shengliang Guan 已提交
582 583 584 585 586 587 588 589 590 591 592 593 594 595
}

static SDropVnodeMsg *vnodeParseDropVnodeReq(SRpcMsg *rpcMsg) {
  SDropVnodeMsg *pDrop = rpcMsg->pCont;
  pDrop->vgId = htonl(pDrop->vgId);
  return pDrop;
}

static SAuthVnodeMsg *vnodeParseAuthVnodeReq(SRpcMsg *rpcMsg) {
  SAuthVnodeMsg *pAuth = rpcMsg->pCont;
  pAuth->vgId = htonl(pAuth->vgId);
  return pAuth;
}

S
Shengliang Guan 已提交
596
int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
597 598 599
  SCreateVnodeMsg *pCreate = dndParseCreateVnodeReq(rpcMsg);
  dDebug("vgId:%d, create vnode req is received", pCreate->vgId);

S
Shengliang Guan 已提交
600
  SVnodeCfg vnodeCfg = {0};
601
  dndGenerateVnodeCfg(pCreate, &vnodeCfg);
S
Shengliang Guan 已提交
602

603 604
  SWrapperCfg wrapperCfg = {0};
  dndGenerateWrapperCfg(pDnode, pCreate, &wrapperCfg);
S
Shengliang Guan 已提交
605

606
  SVnodeObj *pVnode = dndAcquireVnode(pDnode, pCreate->vgId);
S
Shengliang Guan 已提交
607
  if (pVnode != NULL) {
608
    dDebug("vgId:%d, already exist, return success", pCreate->vgId);
S
Shengliang Guan 已提交
609 610 611 612
    dndReleaseVnode(pDnode, pVnode);
    return 0;
  }

613 614 615 616 617
  SVnode *pImpl = vnodeOpen(wrapperCfg.path, NULL /*pCfg*/);
  if (pImpl == NULL) {
    return -1;
  }

618
  int32_t code = dndOpenVnode(pDnode, &wrapperCfg, pImpl);
619 620 621 622 623 624 625 626 627 628 629 630 631
  if (code != 0) {
    vnodeClose(pImpl);
    vnodeDestroy(wrapperCfg.path);
    terrno = code;
    return code;
  }

  code = dndWriteVnodesToFile(pDnode);
  if (code != 0) {
    vnodeClose(pImpl);
    vnodeDestroy(wrapperCfg.path);
    terrno = code;
    return code;
S
Shengliang Guan 已提交
632 633 634 635 636
  }

  return 0;
}

S
Shengliang Guan 已提交
637
int32_t dndProcessAlterVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
638 639
  SAlterVnodeMsg *pAlter = (SAlterVnodeMsg *)dndParseCreateVnodeReq(rpcMsg);
  dDebug("vgId:%d, alter vnode req is received", pAlter->vgId);
S
Shengliang Guan 已提交
640

641 642
  SVnodeCfg vnodeCfg = {0};
  dndGenerateVnodeCfg(pAlter, &vnodeCfg);
S
Shengliang Guan 已提交
643

644 645 646
  SWrapperCfg wrapperCfg = {0};
  dndGenerateWrapperCfg(pDnode, pAlter, &wrapperCfg);

647
  SVnodeObj *pVnode = dndAcquireVnode(pDnode, pAlter->vgId);
S
Shengliang Guan 已提交
648
  if (pVnode == NULL) {
649
    dDebug("vgId:%d, failed to alter vnode since %s", pAlter->vgId, terrstr());
S
Shengliang Guan 已提交
650 651 652
    return terrno;
  }

653 654 655 656 657 658
  if (wrapperCfg.vgVersion == pVnode->vgVersion) {
    dndReleaseVnode(pDnode, pVnode);
    dDebug("vgId:%d, no need to alter vnode cfg for version unchanged ", pAlter->vgId);
    return 0;
  }

S
Shengliang Guan 已提交
659
  if (vnodeAlter(pVnode->pImpl, &vnodeCfg) != 0) {
660
    dError("vgId:%d, failed to alter vnode since %s", pAlter->vgId, terrstr());
S
Shengliang Guan 已提交
661 662 663 664
    dndReleaseVnode(pDnode, pVnode);
    return terrno;
  }

665 666 667 668 669 670 671
  int32_t oldVersion = pVnode->vgVersion;
  pVnode->vgVersion = wrapperCfg.vgVersion;
  int32_t code = dndWriteVnodesToFile(pDnode);
  if (code != 0) {
    pVnode->vgVersion = oldVersion;
  }

S
Shengliang Guan 已提交
672
  dndReleaseVnode(pDnode, pVnode);
673
  return code;
S
Shengliang Guan 已提交
674 675
}

S
Shengliang Guan 已提交
676
int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
S
Shengliang Guan 已提交
677 678 679 680 681 682 683 684
  SDropVnodeMsg *pDrop = vnodeParseDropVnodeReq(rpcMsg);

  int32_t vgId = pDrop->vgId;
  dDebug("vgId:%d, drop vnode req is received", vgId);

  SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
  if (pVnode == NULL) {
    dDebug("vgId:%d, failed to drop since %s", vgId, terrstr());
S
Shengliang Guan 已提交
685
    return 0;
S
Shengliang Guan 已提交
686 687
  }

688 689 690
  pVnode->dropped = 1;
  if (dndWriteVnodesToFile(pDnode) != 0) {
    pVnode->dropped = 0;
S
Shengliang Guan 已提交
691 692 693
    return terrno;
  }

694 695
  dndReleaseVnode(pDnode, pVnode);
  dndCloseVnode(pDnode, pVnode);
696 697 698 699
  vnodeClose(pVnode->pImpl);
  vnodeDestroy(pVnode->path);
  dndWriteVnodesToFile(pDnode);

S
Shengliang Guan 已提交
700 701 702
  return 0;
}

S
Shengliang Guan 已提交
703
int32_t dndProcessAuthVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
S
Shengliang Guan 已提交
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
  SAuthVnodeMsg *pAuth = (SAuthVnodeMsg *)vnodeParseAuthVnodeReq(rpcMsg);

  int32_t code = 0;
  int32_t vgId = pAuth->vgId;
  dDebug("vgId:%d, auth vnode req is received", vgId);

  SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
  if (pVnode == NULL) {
    dDebug("vgId:%d, failed to auth since %s", vgId, terrstr());
    return terrno;
  }

  pVnode->accessState = pAuth->accessState;
  dndReleaseVnode(pDnode, pVnode);
  return 0;
}

S
Shengliang Guan 已提交
721
int32_t dndProcessSyncVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
S
Shengliang Guan 已提交
722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
  SAuthVnodeMsg *pAuth = (SAuthVnodeMsg *)vnodeParseAuthVnodeReq(rpcMsg);

  int32_t vgId = pAuth->vgId;
  dDebug("vgId:%d, auth vnode req is received", vgId);

  SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
  if (pVnode == NULL) {
    dDebug("vgId:%d, failed to auth since %s", vgId, terrstr());
    return terrno;
  }

  if (vnodeSync(pVnode->pImpl) != 0) {
    dError("vgId:%d, failed to auth vnode since %s", vgId, terrstr());
    dndReleaseVnode(pDnode, pVnode);
    return terrno;
  }

  dndReleaseVnode(pDnode, pVnode);
  return 0;
}

S
Shengliang Guan 已提交
743
int32_t dndProcessCompactVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
S
Shengliang Guan 已提交
744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
  SCompactVnodeMsg *pCompact = (SCompactVnodeMsg *)vnodeParseDropVnodeReq(rpcMsg);

  int32_t vgId = pCompact->vgId;
  dDebug("vgId:%d, compact vnode req is received", vgId);

  SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
  if (pVnode == NULL) {
    dDebug("vgId:%d, failed to compact since %s", vgId, terrstr());
    return terrno;
  }

  if (vnodeCompact(pVnode->pImpl) != 0) {
    dError("vgId:%d, failed to compact vnode since %s", vgId, terrstr());
    dndReleaseVnode(pDnode, pVnode);
    return terrno;
  }

  dndReleaseVnode(pDnode, pVnode);
  return 0;
}

765 766 767
static void dndProcessVnodeQueryQueue(SVnodeObj *pVnode, SRpcMsg *pMsg) {
  SRpcMsg *pRsp = NULL;
  vnodeProcessQueryReq(pVnode->pImpl, pMsg, &pRsp);
S
Shengliang Guan 已提交
768 769
}

770 771 772
static void dndProcessVnodeFetchQueue(SVnodeObj *pVnode, SRpcMsg *pMsg) {
  SRpcMsg *pRsp = NULL;
  vnodeProcessFetchReq(pVnode->pImpl, pMsg, &pRsp);
S
Shengliang Guan 已提交
773 774
}

775
static void dndProcessVnodeWriteQueue(SVnodeObj *pVnode, STaosQall *qall, int32_t numOfMsgs) {
776
  SArray *pArray = taosArrayInit(numOfMsgs, sizeof(SRpcMsg *));
S
Shengliang Guan 已提交
777 778

  for (int32_t i = 0; i < numOfMsgs; ++i) {
779 780 781
    SRpcMsg *pMsg = NULL;
    taosGetQitem(qall, (void **)&pMsg);
    void *ptr = taosArrayPush(pArray, &pMsg);
782
    assert(ptr != NULL);
S
Shengliang Guan 已提交
783 784
  }

H
more  
Hongze Cheng 已提交
785
  vnodeProcessWMsgs(pVnode->pImpl, pArray);
786 787 788 789

  for (size_t i = 0; i < numOfMsgs; i++) {
    SRpcMsg *pRsp = NULL;
    SRpcMsg *pMsg = *(SRpcMsg **)taosArrayGet(pArray, i);
H
more  
Hongze Cheng 已提交
790
    int32_t  code = vnodeApplyWMsg(pVnode->pImpl, pMsg, &pRsp);
791
    if (pRsp != NULL) {
S
Shengliang Guan 已提交
792
      pRsp->ahandle = pMsg->ahandle;
793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
      rpcSendResponse(pRsp);
      free(pRsp);
    } else {
      if (code != 0) code = terrno;
      SRpcMsg rpcRsp = {.handle = pMsg->handle, .ahandle = pMsg->ahandle, .code = code};
      rpcSendResponse(&rpcRsp);
    }
  }

  for (size_t i = 0; i < numOfMsgs; i++) {
    SRpcMsg *pMsg = *(SRpcMsg **)taosArrayGet(pArray, i);
    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
  }

  taosArrayDestroy(pArray);
S
Shengliang Guan 已提交
809 810
}

811
static void dndProcessVnodeApplyQueue(SVnodeObj *pVnode, STaosQall *qall, int32_t numOfMsgs) {
812 813
  SRpcMsg *pMsg = NULL;

S
Shengliang Guan 已提交
814 815
  for (int32_t i = 0; i < numOfMsgs; ++i) {
    taosGetQitem(qall, (void **)&pMsg);
816 817 818

    SRpcMsg *pRsp = NULL;
    (void)vnodeApplyWMsg(pVnode->pImpl, pMsg, &pRsp);
S
Shengliang Guan 已提交
819 820 821
  }
}

822
static void dndProcessVnodeSyncQueue(SVnodeObj *pVnode, STaosQall *qall, int32_t numOfMsgs) {
823 824
  SRpcMsg *pMsg = NULL;

S
Shengliang Guan 已提交
825 826
  for (int32_t i = 0; i < numOfMsgs; ++i) {
    taosGetQitem(qall, (void **)&pMsg);
827 828 829

    SRpcMsg *pRsp = NULL;
    (void)vnodeProcessSyncReq(pVnode->pImpl, pMsg, &pRsp);
S
Shengliang Guan 已提交
830 831 832
  }
}

833
static int32_t dndWriteRpcMsgToVnodeQueue(STaosQueue *pQueue, SRpcMsg *pRpcMsg) {
S
Shengliang Guan 已提交
834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858
  int32_t code = 0;

  if (pQueue == NULL) {
    code = TSDB_CODE_MSG_NOT_PROCESSED;
  } else {
    SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg));
    if (pMsg == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
    } else {
      *pMsg = *pRpcMsg;
      if (taosWriteQitem(pQueue, pMsg) != 0) {
        code = TSDB_CODE_OUT_OF_MEMORY;
      }
    }
  }

  if (code != TSDB_CODE_SUCCESS) {
    SRpcMsg rsp = {.handle = pRpcMsg->handle, .code = code};
    rpcSendResponse(&rsp);
    rpcFreeCont(pRpcMsg->pCont);
  }
}

static SVnodeObj *dndAcquireVnodeFromMsg(SDnode *pDnode, SRpcMsg *pMsg) {
  SMsgHead *pHead = (SMsgHead *)pMsg->pCont;
859
  pHead->contLen = htonl(pHead->contLen);
S
Shengliang Guan 已提交
860 861 862 863
  pHead->vgId = htonl(pHead->vgId);

  SVnodeObj *pVnode = dndAcquireVnode(pDnode, pHead->vgId);
  if (pVnode == NULL) {
864
    SRpcMsg rsp = {.handle = pMsg->handle, .code = TSDB_CODE_VND_INVALID_VGROUP_ID};
S
Shengliang Guan 已提交
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
    rpcSendResponse(&rsp);
    rpcFreeCont(pMsg->pCont);
  }

  return pVnode;
}

void dndProcessVnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
  SVnodeObj *pVnode = dndAcquireVnodeFromMsg(pDnode, pMsg);
  if (pVnode != NULL) {
    dndWriteRpcMsgToVnodeQueue(pVnode->pWriteQ, pMsg);
    dndReleaseVnode(pDnode, pVnode);
  }
}

void dndProcessVnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
  SVnodeObj *pVnode = dndAcquireVnodeFromMsg(pDnode, pMsg);
  if (pVnode != NULL) {
883
    dndWriteRpcMsgToVnodeQueue(pVnode->pSyncQ, pMsg);
S
Shengliang Guan 已提交
884 885 886 887 888 889 890
    dndReleaseVnode(pDnode, pVnode);
  }
}

void dndProcessVnodeQueryMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
  SVnodeObj *pVnode = dndAcquireVnodeFromMsg(pDnode, pMsg);
  if (pVnode != NULL) {
891
    dndWriteRpcMsgToVnodeQueue(pVnode->pQueryQ, pMsg);
S
Shengliang Guan 已提交
892 893 894 895 896 897 898
    dndReleaseVnode(pDnode, pVnode);
  }
}

void dndProcessVnodeFetchMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
  SVnodeObj *pVnode = dndAcquireVnodeFromMsg(pDnode, pMsg);
  if (pVnode != NULL) {
899
    dndWriteRpcMsgToVnodeQueue(pVnode->pFetchQ, pMsg);
S
Shengliang Guan 已提交
900 901 902 903
    dndReleaseVnode(pDnode, pVnode);
  }
}

904
static int32_t dndPutMsgIntoVnodeApplyQueue(SDnode *pDnode, int32_t vgId, SRpcMsg *pMsg) {
S
Shengliang Guan 已提交
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938
  SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
  if (pVnode == NULL) {
    return -1;
  }

  int32_t code = taosWriteQitem(pVnode->pApplyQ, pMsg);
  dndReleaseVnode(pDnode, pVnode);
  return code;
}

static int32_t dndAllocVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  pVnode->pQueryQ = tWorkerAllocQueue(&pMgmt->queryPool, pVnode, (FProcessItem)dndProcessVnodeQueryQueue);
  if (pVnode->pQueryQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  return 0;
}

static void dndFreeVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  tWorkerFreeQueue(&pMgmt->queryPool, pVnode->pQueryQ);
  pVnode->pQueryQ = NULL;
}

static int32_t dndAllocVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  pVnode->pFetchQ = tWorkerAllocQueue(&pMgmt->fetchPool, pVnode, (FProcessItem)dndProcessVnodeFetchQueue);
  if (pVnode->pFetchQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
S
Shengliang Guan 已提交
939

S
Shengliang Guan 已提交
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
  return 0;
}

static void dndFreeVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  tWorkerFreeQueue(&pMgmt->fetchPool, pVnode->pFetchQ);
  pVnode->pFetchQ = NULL;
}

static int32_t dndInitVnodeReadWorker(SDnode *pDnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;

  int32_t maxFetchThreads = 4;
  float   threadsForQuery = MAX(pDnode->opt.numOfCores * pDnode->opt.ratioOfQueryCores, 1);

  SWorkerPool *pPool = &pMgmt->queryPool;
  pPool->name = "vnode-query";
  pPool->min = (int32_t)threadsForQuery;
  pPool->max = pPool->min;
  if (tWorkerInit(pPool) != 0) {
S
Shengliang Guan 已提交
960 961
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
S
Shengliang Guan 已提交
962 963 964 965 966 967 968
  }

  pPool = &pMgmt->fetchPool;
  pPool->name = "vnode-fetch";
  pPool->min = MIN(maxFetchThreads, pDnode->opt.numOfCores);
  pPool->max = pPool->min;
  if (tWorkerInit(pPool) != 0) {
S
Shengliang Guan 已提交
969 970
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
S
Shengliang Guan 已提交
971 972
  }

973
  dDebug("vnode read worker is initialized");
S
Shengliang Guan 已提交
974 975 976 977 978 979 980
  return 0;
}

static void dndCleanupVnodeReadWorker(SDnode *pDnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  tWorkerCleanup(&pMgmt->fetchPool);
  tWorkerCleanup(&pMgmt->queryPool);
981
  dDebug("vnode close worker is initialized");
S
Shengliang Guan 已提交
982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
}

static int32_t dndAllocVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  pVnode->pWriteQ = tMWorkerAllocQueue(&pMgmt->writePool, pVnode, (FProcessItems)dndProcessVnodeWriteQueue);
  if (pVnode->pWriteQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  return 0;
}

static void dndFreeVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  tMWorkerFreeQueue(&pMgmt->writePool, pVnode->pWriteQ);
  pVnode->pWriteQ = NULL;
}

static int32_t dndAllocVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  pVnode->pApplyQ = tMWorkerAllocQueue(&pMgmt->writePool, pVnode, (FProcessItems)dndProcessVnodeApplyQueue);
  if (pVnode->pApplyQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  return 0;
}

static void dndFreeVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  tMWorkerFreeQueue(&pMgmt->writePool, pVnode->pApplyQ);
  pVnode->pApplyQ = NULL;
}

static int32_t dndInitVnodeWriteWorker(SDnode *pDnode) {
1019
  SVnodesMgmt  *pMgmt = &pDnode->vmgmt;
S
Shengliang Guan 已提交
1020 1021
  SMWorkerPool *pPool = &pMgmt->writePool;
  pPool->name = "vnode-write";
1022
  pPool->max = pDnode->opt.numOfCores;
S
Shengliang Guan 已提交
1023
  if (tMWorkerInit(pPool) != 0) {
S
Shengliang Guan 已提交
1024
    terrno = TSDB_CODE_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
1025 1026 1027
    return -1;
  }

1028
  dDebug("vnode write worker is initialized");
S
Shengliang Guan 已提交
1029 1030 1031 1032 1033 1034
  return 0;
}

static void dndCleanupVnodeWriteWorker(SDnode *pDnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  tMWorkerCleanup(&pMgmt->writePool);
1035
  dDebug("vnode write worker is closed");
S
Shengliang Guan 已提交
1036 1037 1038 1039
}

static int32_t dndAllocVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
1040
  pVnode->pSyncQ = tMWorkerAllocQueue(&pMgmt->syncPool, pVnode, (FProcessItems)dndProcessVnodeSyncQueue);
S
Shengliang Guan 已提交
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
  if (pVnode->pSyncQ == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  return 0;
}

static void dndFreeVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
1051
  tMWorkerFreeQueue(&pMgmt->syncPool, pVnode->pSyncQ);
S
Shengliang Guan 已提交
1052 1053 1054 1055
  pVnode->pSyncQ = NULL;
}

static int32_t dndInitVnodeSyncWorker(SDnode *pDnode) {
1056
  int32_t maxThreads = pDnode->opt.numOfCores / 2;
S
Shengliang Guan 已提交
1057 1058 1059
  if (maxThreads < 1) maxThreads = 1;

  SVnodesMgmt  *pMgmt = &pDnode->vmgmt;
1060
  SMWorkerPool *pPool = &pMgmt->syncPool;
S
Shengliang Guan 已提交
1061 1062 1063
  pPool->name = "vnode-sync";
  pPool->max = maxThreads;
  if (tMWorkerInit(pPool) != 0) {
S
Shengliang Guan 已提交
1064
    terrno = TSDB_CODE_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
1065 1066 1067
    return -1;
  }

1068
  dDebug("vnode sync worker is initialized");
S
Shengliang Guan 已提交
1069 1070 1071 1072 1073 1074
  return 0;
}

static void dndCleanupVnodeSyncWorker(SDnode *pDnode) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;
  tMWorkerCleanup(&pMgmt->syncPool);
1075
  dDebug("vnode sync worker is closed");
S
Shengliang Guan 已提交
1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
}

int32_t dndInitVnodes(SDnode *pDnode) {
  dInfo("dnode-vnodes start to init");

  if (dndInitVnodeReadWorker(pDnode) != 0) {
    dError("failed to init vnodes read worker since %s", terrstr());
    return -1;
  }

  if (dndInitVnodeWriteWorker(pDnode) != 0) {
    dError("failed to init vnodes write worker since %s", terrstr());
    return -1;
  }

  if (dndInitVnodeSyncWorker(pDnode) != 0) {
    dError("failed to init vnodes sync worker since %s", terrstr());
    return -1;
  }

  if (dndOpenVnodes(pDnode) != 0) {
    dError("failed to open vnodes since %s", terrstr());
    return -1;
  }

  dInfo("dnode-vnodes is initialized");
  return 0;
}

void dndCleanupVnodes(SDnode *pDnode) {
  dInfo("dnode-vnodes start to clean up");
  dndCloseVnodes(pDnode);
  dndCleanupVnodeReadWorker(pDnode);
  dndCleanupVnodeWriteWorker(pDnode);
  dndCleanupVnodeSyncWorker(pDnode);
  dInfo("dnode-vnodes is cleaned up");
}

void dndGetVnodeLoads(SDnode *pDnode, SVnodeLoads *pLoads) {
  SVnodesMgmt *pMgmt = &pDnode->vmgmt;

  taosRLockLatch(&pMgmt->latch);
  pLoads->num = taosHashGetSize(pMgmt->hash);

  int32_t v = 0;
1121
  void   *pIter = taosHashIterate(pMgmt->hash, NULL);
S
Shengliang Guan 已提交
1122 1123 1124 1125
  while (pIter) {
    SVnodeObj **ppVnode = pIter;
    if (ppVnode == NULL || *ppVnode == NULL) continue;

1126
    SVnodeObj  *pVnode = *ppVnode;
S
Shengliang Guan 已提交
1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140
    SVnodeLoad *pLoad = &pLoads->data[v++];

    vnodeGetLoad(pVnode->pImpl, pLoad);
    pLoad->vgId = htonl(pLoad->vgId);
    pLoad->totalStorage = htobe64(pLoad->totalStorage);
    pLoad->compStorage = htobe64(pLoad->compStorage);
    pLoad->pointsWritten = htobe64(pLoad->pointsWritten);
    pLoad->tablesNum = htobe64(pLoad->tablesNum);

    pIter = taosHashIterate(pMgmt->hash, pIter);
  }

  taosRUnLockLatch(&pMgmt->latch);
}