vmInt.c 11.2 KB
Newer Older
S
shm  
Shengliang Guan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http:www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE
S
shm  
Shengliang Guan 已提交
17
#include "vmInt.h"
S
shm  
Shengliang Guan 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48

SVnodeObj *vmAcquireVnode(SVnodesMgmt *pMgmt, int32_t vgId) {
  SVnodeObj *pVnode = NULL;
  int32_t    refCount = 0;

  taosRLockLatch(&pMgmt->latch);
  taosHashGetDup(pMgmt->hash, &vgId, sizeof(int32_t), (void *)&pVnode);
  if (pVnode == NULL) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
  } else {
    refCount = atomic_add_fetch_32(&pVnode->refCount, 1);
  }
  taosRUnLockLatch(&pMgmt->latch);

  if (pVnode != NULL) {
    dTrace("vgId:%d, acquire vnode, refCount:%d", pVnode->vgId, refCount);
  }

  return pVnode;
}

void vmReleaseVnode(SVnodesMgmt *pMgmt, SVnodeObj *pVnode) {
  if (pVnode == NULL) return;

  taosRLockLatch(&pMgmt->latch);
  int32_t refCount = atomic_sub_fetch_32(&pVnode->refCount, 1);
  taosRUnLockLatch(&pMgmt->latch);
  dTrace("vgId:%d, release vnode, refCount:%d", pVnode->vgId, refCount);
}

int32_t vmOpenVnode(SVnodesMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) {
wafwerar's avatar
wafwerar 已提交
49
  SVnodeObj *pVnode = taosMemoryCalloc(1, sizeof(SVnodeObj));
S
shm  
Shengliang Guan 已提交
50 51 52 53 54 55 56
  if (pVnode == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  pVnode->vgId = pCfg->vgId;
  pVnode->refCount = 0;
S
Shengliang Guan 已提交
57
  pVnode->vgVersion = pCfg->vgVersion;
S
shm  
Shengliang Guan 已提交
58 59 60
  pVnode->dropped = 0;
  pVnode->accessState = TSDB_VN_ALL_ACCCESS;
  pVnode->path = tstrdup(pCfg->path);
S
Shengliang Guan 已提交
61 62
  pVnode->pImpl = pImpl;
  pVnode->pWrapper = pMgmt->pWrapper;
S
shm  
Shengliang Guan 已提交
63

64
  if (pVnode->path == NULL) {
S
shm  
Shengliang Guan 已提交
65 66 67 68
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

S
shm  
Shengliang Guan 已提交
69
  if (vmAllocQueue(pMgmt, pVnode) != 0) {
S
shm  
Shengliang Guan 已提交
70 71 72 73 74 75 76 77 78 79 80 81
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

  taosWLockLatch(&pMgmt->latch);
  int32_t code = taosHashPut(pMgmt->hash, &pVnode->vgId, sizeof(int32_t), &pVnode, sizeof(SVnodeObj *));
  taosWUnLockLatch(&pMgmt->latch);

  return code;
}

void vmCloseVnode(SVnodesMgmt *pMgmt, SVnodeObj *pVnode) {
S
Shengliang Guan 已提交
82
  char path[TSDB_FILENAME_LEN] = {0};
H
refact  
Hongze Cheng 已提交
83

S
shm  
Shengliang Guan 已提交
84 85 86 87 88 89 90 91 92 93 94
  taosWLockLatch(&pMgmt->latch);
  taosHashRemove(pMgmt->hash, &pVnode->vgId, sizeof(int32_t));
  taosWUnLockLatch(&pMgmt->latch);

  vmReleaseVnode(pMgmt, pVnode);
  while (pVnode->refCount > 0) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pWriteQ)) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pSyncQ)) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pApplyQ)) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pQueryQ)) taosMsleep(10);
  while (!taosQueueEmpty(pVnode->pFetchQ)) taosMsleep(10);
S
Shengliang Guan 已提交
95
  while (!taosQueueEmpty(pVnode->pMergeQ)) taosMsleep(10);
S
shm  
Shengliang Guan 已提交
96

S
shm  
Shengliang Guan 已提交
97
  vmFreeQueue(pMgmt, pVnode);
S
shm  
Shengliang Guan 已提交
98 99 100 101 102 103 104
  vnodeClose(pVnode->pImpl);
  pVnode->pImpl = NULL;

  dDebug("vgId:%d, vnode is closed", pVnode->vgId);

  if (pVnode->dropped) {
    dDebug("vgId:%d, vnode is destroyed for dropped:%d", pVnode->vgId, pVnode->dropped);
H
refact  
Hongze Cheng 已提交
105 106
    snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, pVnode->vgId);
    vnodeDestroy(path, pMgmt->pTfs);
S
shm  
Shengliang Guan 已提交
107 108
  }

wafwerar's avatar
wafwerar 已提交
109 110
  taosMemoryFree(pVnode->path);
  taosMemoryFree(pVnode);
S
shm  
Shengliang Guan 已提交
111 112
}

S
Shengliang Guan 已提交
113
static void *vmOpenVnodeInThread(void *param) {
S
shm  
Shengliang Guan 已提交
114 115 116
  SVnodeThread *pThread = param;
  SVnodesMgmt  *pMgmt = pThread->pMgmt;
  SDnode       *pDnode = pMgmt->pDnode;
H
Hongze Cheng 已提交
117
  char          path[TSDB_FILENAME_LEN];
S
shm  
Shengliang Guan 已提交
118 119 120 121 122 123 124 125 126 127

  dDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum);
  setThreadName("open-vnodes");

  for (int32_t v = 0; v < pThread->vnodeNum; ++v) {
    SWrapperCfg *pCfg = &pThread->pCfgs[v];

    char stepDesc[TSDB_STEP_DESC_LEN] = {0};
    snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to restore, %d of %d have been opened", pCfg->vgId,
             pMgmt->state.openVnodes, pMgmt->state.totalVnodes);
S
Shengliang Guan 已提交
128
    dmReportStartup(pDnode, "vnode-open", stepDesc);
S
shm  
Shengliang Guan 已提交
129

S
Shengliang Guan 已提交
130
    SMsgCb msgCb = pMgmt->pDnode->data.msgCb;
S
Shengliang Guan 已提交
131
    msgCb.pWrapper = pMgmt->pWrapper;
L
Liu Jicong 已提交
132
    msgCb.queueFps[WRITE_QUEUE] = vmPutMsgToWriteQueue;
S
Shengliang Guan 已提交
133 134
    msgCb.queueFps[SYNC_QUEUE] = vmPutMsgToSyncQueue;
    msgCb.queueFps[APPLY_QUEUE] = vmPutMsgToApplyQueue;
S
Shengliang Guan 已提交
135
    msgCb.queueFps[QUERY_QUEUE] = vmPutMsgToQueryQueue;
S
Shengliang Guan 已提交
136
    msgCb.queueFps[FETCH_QUEUE] = vmPutMsgToFetchQueue;
S
Shengliang Guan 已提交
137
    msgCb.queueFps[MERGE_QUEUE] = vmPutMsgToMergeQueue;
S
Shengliang Guan 已提交
138
    msgCb.qsizeFp = vmGetQueueSize;
H
Hongze Cheng 已提交
139 140
    snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, pCfg->vgId);
    SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, msgCb);
S
shm  
Shengliang Guan 已提交
141 142 143 144
    if (pImpl == NULL) {
      dError("vgId:%d, failed to open vnode by thread:%d", pCfg->vgId, pThread->threadIndex);
      pThread->failed++;
    } else {
S
shm  
Shengliang Guan 已提交
145
      vmOpenVnode(pMgmt, pCfg, pImpl);
S
shm  
Shengliang Guan 已提交
146 147
      dDebug("vgId:%d, is opened by thread:%d", pCfg->vgId, pThread->threadIndex);
      pThread->opened++;
S
Shengliang Guan 已提交
148
      atomic_add_fetch_32(&pMgmt->state.openVnodes, 1);
S
shm  
Shengliang Guan 已提交
149 150 151 152 153 154 155 156 157 158 159 160 161 162
    }
  }

  dDebug("thread:%d, total vnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened,
         pThread->failed);
  return NULL;
}

static int32_t vmOpenVnodes(SVnodesMgmt *pMgmt) {
  SDnode *pDnode = pMgmt->pDnode;

  pMgmt->hash = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
  if (pMgmt->hash == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
163
    dError("failed to init vnode hash since %s", terrstr());
S
shm  
Shengliang Guan 已提交
164 165 166 167 168
    return -1;
  }

  SWrapperCfg *pCfgs = NULL;
  int32_t      numOfVnodes = 0;
S
Shengliang Guan 已提交
169
  if (vmGetVnodeListFromFile(pMgmt, &pCfgs, &numOfVnodes) != 0) {
S
shm  
Shengliang Guan 已提交
170 171 172 173 174 175
    dInfo("failed to get vnode list from disk since %s", terrstr());
    return -1;
  }

  pMgmt->state.totalVnodes = numOfVnodes;

S
Shengliang Guan 已提交
176
  int32_t threadNum = 1;  // tsNumOfCores;
S
shm  
Shengliang Guan 已提交
177 178
  int32_t vnodesPerThread = numOfVnodes / threadNum + 1;

wafwerar's avatar
wafwerar 已提交
179
  SVnodeThread *threads = taosMemoryCalloc(threadNum, sizeof(SVnodeThread));
S
shm  
Shengliang Guan 已提交
180 181 182
  for (int32_t t = 0; t < threadNum; ++t) {
    threads[t].threadIndex = t;
    threads[t].pMgmt = pMgmt;
wafwerar's avatar
wafwerar 已提交
183
    threads[t].pCfgs = taosMemoryCalloc(vnodesPerThread, sizeof(SWrapperCfg));
S
shm  
Shengliang Guan 已提交
184 185 186 187 188 189 190 191 192 193 194 195 196 197
  }

  for (int32_t v = 0; v < numOfVnodes; ++v) {
    int32_t       t = v % threadNum;
    SVnodeThread *pThread = &threads[t];
    pThread->pCfgs[pThread->vnodeNum++] = pCfgs[v];
  }

  dInfo("start %d threads to open %d vnodes", threadNum, numOfVnodes);

  for (int32_t t = 0; t < threadNum; ++t) {
    SVnodeThread *pThread = &threads[t];
    if (pThread->vnodeNum == 0) continue;

S
Shengliang Guan 已提交
198 199 200
    TdThreadAttr thAttr;
    taosThreadAttrInit(&thAttr);
    taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
S
Shengliang Guan 已提交
201
    if (taosThreadCreate(&pThread->thread, &thAttr, vmOpenVnodeInThread, pThread) != 0) {
S
shm  
Shengliang Guan 已提交
202 203 204
      dError("thread:%d, failed to create thread to open vnode, reason:%s", pThread->threadIndex, strerror(errno));
    }

S
Shengliang Guan 已提交
205
    taosThreadAttrDestroy(&thAttr);
S
shm  
Shengliang Guan 已提交
206 207 208 209 210
  }

  for (int32_t t = 0; t < threadNum; ++t) {
    SVnodeThread *pThread = &threads[t];
    if (pThread->vnodeNum > 0 && taosCheckPthreadValid(pThread->thread)) {
S
Shengliang Guan 已提交
211
      taosThreadJoin(pThread->thread, NULL);
S
shm  
Shengliang Guan 已提交
212
    }
wafwerar's avatar
wafwerar 已提交
213
    taosMemoryFree(pThread->pCfgs);
S
shm  
Shengliang Guan 已提交
214
  }
wafwerar's avatar
wafwerar 已提交
215 216
  taosMemoryFree(threads);
  taosMemoryFree(pCfgs);
S
shm  
Shengliang Guan 已提交
217 218 219 220 221 222 223 224 225 226 227 228 229 230

  if (pMgmt->state.openVnodes != pMgmt->state.totalVnodes) {
    dError("there are total vnodes:%d, opened:%d", pMgmt->state.totalVnodes, pMgmt->state.openVnodes);
    return -1;
  } else {
    dInfo("total vnodes:%d open successfully", pMgmt->state.totalVnodes);
    return 0;
  }
}

static void vmCloseVnodes(SVnodesMgmt *pMgmt) {
  dInfo("start to close all vnodes");

  int32_t     numOfVnodes = 0;
S
Shengliang Guan 已提交
231
  SVnodeObj **pVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes);
S
shm  
Shengliang Guan 已提交
232 233

  for (int32_t i = 0; i < numOfVnodes; ++i) {
S
shm  
Shengliang Guan 已提交
234
    vmCloseVnode(pMgmt, pVnodes[i]);
S
shm  
Shengliang Guan 已提交
235 236 237
  }

  if (pVnodes != NULL) {
wafwerar's avatar
wafwerar 已提交
238
    taosMemoryFree(pVnodes);
S
shm  
Shengliang Guan 已提交
239 240 241 242 243 244 245 246 247 248 249 250 251 252
  }

  if (pMgmt->hash != NULL) {
    taosHashCleanup(pMgmt->hash);
    pMgmt->hash = NULL;
  }

  dInfo("total vnodes:%d are all closed", numOfVnodes);
}

static void vmCleanup(SMgmtWrapper *pWrapper) {
  SVnodesMgmt *pMgmt = pWrapper->pMgmt;
  if (pMgmt == NULL) return;

S
Shengliang Guan 已提交
253
  dInfo("vnode-mgmt start to cleanup");
S
shm  
Shengliang Guan 已提交
254 255 256
  vmCloseVnodes(pMgmt);
  vmStopWorker(pMgmt);
  vnodeCleanup();
257
  tfsClose(pMgmt->pTfs);
wafwerar's avatar
wafwerar 已提交
258
  taosMemoryFree(pMgmt);
S
shm  
Shengliang Guan 已提交
259
  pWrapper->pMgmt = NULL;
260

S
Shengliang Guan 已提交
261
  dInfo("vnode-mgmt is cleaned up");
S
shm  
Shengliang Guan 已提交
262
}
S
shm  
Shengliang Guan 已提交
263

S
shm  
Shengliang Guan 已提交
264
static int32_t vmInit(SMgmtWrapper *pWrapper) {
S
shm  
Shengliang Guan 已提交
265
  SDnode      *pDnode = pWrapper->pDnode;
wafwerar's avatar
wafwerar 已提交
266
  SVnodesMgmt *pMgmt = taosMemoryCalloc(1, sizeof(SVnodesMgmt));
S
shm  
Shengliang Guan 已提交
267 268
  int32_t      code = -1;

S
Shengliang Guan 已提交
269
  dInfo("vnode-mgmt start to init");
S
shm  
Shengliang Guan 已提交
270 271 272 273 274 275 276 277
  if (pMgmt == NULL) goto _OVER;

  pMgmt->path = pWrapper->path;
  pMgmt->pDnode = pWrapper->pDnode;
  pMgmt->pWrapper = pWrapper;
  taosInitRWLatch(&pMgmt->latch);

  SDiskCfg dCfg = {0};
S
Shengliang Guan 已提交
278
  tstrncpy(dCfg.dir, pDnode->data.dataDir, TSDB_FILENAME_LEN);
S
shm  
Shengliang Guan 已提交
279 280
  dCfg.level = 0;
  dCfg.primary = 1;
S
Shengliang Guan 已提交
281 282
  SDiskCfg *pDisks = pDnode->data.disks;
  int32_t   numOfDisks = pDnode->data.numOfDisks;
S
shm  
Shengliang Guan 已提交
283 284 285 286 287 288 289 290 291 292
  if (numOfDisks <= 0 || pDisks == NULL) {
    pDisks = &dCfg;
    numOfDisks = 1;
  }

  pMgmt->pTfs = tfsOpen(pDisks, numOfDisks);
  if (pMgmt->pTfs == NULL) {
    dError("failed to init tfs since %s", terrstr());
    goto _OVER;
  }
S
Shengliang Guan 已提交
293
  dmReportStartup(pDnode, "vnode-tfs", "initialized");
S
shm  
Shengliang Guan 已提交
294

S
shm  
Shengliang Guan 已提交
295 296
  if (walInit() != 0) {
    dError("failed to init wal since %s", terrstr());
S
shm  
Shengliang Guan 已提交
297
    goto _OVER;
S
shm  
Shengliang Guan 已提交
298
  }
S
Shengliang Guan 已提交
299
  dmReportStartup(pDnode, "vnode-wal", "initialized");
S
shm  
Shengliang Guan 已提交
300

M
Minghao Li 已提交
301 302 303 304 305
  if (syncInit() != 0) {
    dError("failed to open sync since %s", terrstr());
    return -1;
  }

H
Hongze Cheng 已提交
306
  if (vnodeInit(tsNumOfCommitThreads) != 0) {
S
shm  
Shengliang Guan 已提交
307
    dError("failed to init vnode since %s", terrstr());
S
shm  
Shengliang Guan 已提交
308 309
    goto _OVER;
  }
S
Shengliang Guan 已提交
310
  dmReportStartup(pDnode, "vnode-commit", "initialized");
S
shm  
Shengliang Guan 已提交
311 312 313 314

  if (vmStartWorker(pMgmt) != 0) {
    dError("failed to init workers since %s", terrstr()) goto _OVER;
  }
S
Shengliang Guan 已提交
315
  dmReportStartup(pDnode, "vnode-worker", "initialized");
S
shm  
Shengliang Guan 已提交
316 317

  if (vmOpenVnodes(pMgmt) != 0) {
S
Shengliang Guan 已提交
318
    dError("failed to open vnode since %s", terrstr());
S
shm  
Shengliang Guan 已提交
319 320
    return -1;
  }
S
Shengliang Guan 已提交
321
  dmReportStartup(pDnode, "vnode-vnodes", "initialized");
S
shm  
Shengliang Guan 已提交
322

S
slzhou 已提交
323
  if (udfcOpen() != 0) {
S
shenglian zhou 已提交
324
    dError("failed to open udfc in vnode");
S
slzhou 已提交
325 326
  }

S
shm  
Shengliang Guan 已提交
327 328
  code = 0;

S
shm  
Shengliang Guan 已提交
329 330 331 332 333 334 335 336
_OVER:
  if (code == 0) {
    pWrapper->pMgmt = pMgmt;
    dInfo("vnodes-mgmt is initialized");
  } else {
    dError("failed to init vnodes-mgmt since %s", terrstr());
    vmCleanup(pWrapper);
  }
S
shm  
Shengliang Guan 已提交
337

S
shm  
Shengliang Guan 已提交
338
  return 0;
S
shm  
Shengliang Guan 已提交
339 340
}

S
shm  
Shengliang Guan 已提交
341
static int32_t vmRequire(SMgmtWrapper *pWrapper, bool *required) {
S
shm  
Shengliang Guan 已提交
342
  SDnode *pDnode = pWrapper->pDnode;
S
Shengliang Guan 已提交
343
  *required = pDnode->data.supportVnodes > 0;
S
shm  
Shengliang Guan 已提交
344
  return 0;
S
shm  
Shengliang Guan 已提交
345
}
S
shm  
Shengliang Guan 已提交
346

347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
static int32_t vmStart(SMgmtWrapper *pWrapper) {
  dDebug("vnode-mgmt start to run");
  SVnodesMgmt *pMgmt = pWrapper->pMgmt;

  taosRLockLatch(&pMgmt->latch);

  void *pIter = taosHashIterate(pMgmt->hash, NULL);
  while (pIter) {
    SVnodeObj **ppVnode = pIter;
    if (ppVnode == NULL || *ppVnode == NULL) continue;

    SVnodeObj *pVnode = *ppVnode;
    vnodeStart(pVnode->pImpl);
    pIter = taosHashIterate(pMgmt->hash, pIter);
  }

  taosRUnLockLatch(&pMgmt->latch);
  return 0;
}

static void vmStop(SMgmtWrapper *pWrapper) {
S
Shengliang Guan 已提交
368
  // process inside the vnode
369 370
}

S
Shengliang Guan 已提交
371
void vmSetMgmtFp(SMgmtWrapper *pWrapper) {
S
shm  
Shengliang Guan 已提交
372
  SMgmtFp mgmtFp = {0};
S
shm  
Shengliang Guan 已提交
373 374
  mgmtFp.openFp = vmInit;
  mgmtFp.closeFp = vmCleanup;
375 376
  mgmtFp.startFp = vmStart;
  mgmtFp.stopFp = vmStop;
S
shm  
Shengliang Guan 已提交
377
  mgmtFp.requiredFp = vmRequire;
S
shm  
Shengliang Guan 已提交
378

S
Shengliang Guan 已提交
379
  vmInitMsgHandle(pWrapper);
C
Cary Xu 已提交
380
  pWrapper->name = "vnode";
S
shm  
Shengliang Guan 已提交
381
  pWrapper->fp = mgmtFp;
S
shm  
Shengliang Guan 已提交
382
}
S
shm  
Shengliang Guan 已提交
383