mnodeSdb.c 31.9 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

S
slguan 已提交
16
#define _DEFAULT_SOURCE
17
#include "os.h"
S
slguan 已提交
18
#include "taoserror.h"
19
#include "hash.h"
S
slguan 已提交
20 21
#include "tutil.h"
#include "tbalance.h"
S
slguan 已提交
22
#include "tqueue.h"
S
slguan 已提交
23
#include "twal.h"
S
slguan 已提交
24
#include "tsync.h"
S
slguan 已提交
25
#include "tglobal.h"
S
slguan 已提交
26
#include "dnode.h"
27
#include "mnode.h"
S
Shengliang Guan 已提交
28 29 30 31
#include "mnodeDef.h"
#include "mnodeInt.h"
#include "mnodeMnode.h"
#include "mnodeDnode.h"
S
Shengliang Guan 已提交
32
#include "mnodeCluster.h"
S
Shengliang Guan 已提交
33
#include "mnodeSdb.h"
H
hzcheng 已提交
34

35
#define SDB_TABLE_LEN 12
36
#define SDB_SYNC_HACK 16
37

S
slguan 已提交
38 39 40 41 42 43 44 45 46
typedef enum {
  SDB_ACTION_INSERT,
  SDB_ACTION_DELETE,
  SDB_ACTION_UPDATE
} ESdbAction;

typedef enum {
  SDB_STATUS_OFFLINE,
  SDB_STATUS_SERVING,
S
slguan 已提交
47
  SDB_STATUS_CLOSING
S
slguan 已提交
48 49
} ESdbStatus;

S
slguan 已提交
50
typedef struct _SSdbTable {
51
  char      tableName[SDB_TABLE_LEN];
S
slguan 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
  ESdbTable tableId;
  ESdbKey   keyType;
  int32_t   hashSessions;
  int32_t   maxRowSize;
  int32_t   refCountPos;
  int32_t   autoIndex;
  int64_t   numOfRows;
  void *    iHandle;
  int32_t (*insertFp)(SSdbOper *pDesc);
  int32_t (*deleteFp)(SSdbOper *pOper);
  int32_t (*updateFp)(SSdbOper *pOper);
  int32_t (*decodeFp)(SSdbOper *pOper);
  int32_t (*encodeFp)(SSdbOper *pOper);
  int32_t (*destroyFp)(SSdbOper *pOper);
  int32_t (*restoredFp)();
S
slguan 已提交
67 68
} SSdbTable;

S
slguan 已提交
69 70 71 72 73 74 75 76 77 78 79 80
typedef struct {
  ESyncRole  role;
  ESdbStatus status;
  int64_t    version;
  void *     sync;
  void *     wal;
  SSyncCfg   cfg;
  int32_t    numOfTables;
  SSdbTable *tableList[SDB_TABLE_MAX];
  pthread_mutex_t mutex;
} SSdbObject;

81 82 83 84 85 86 87 88 89 90
typedef struct {
  pthread_t thread;
  int32_t   workerId;
} SSdbWriteWorker;

typedef struct {
  int32_t num;
  SSdbWriteWorker *writeWorker;
} SSdbWriteWorkerPool;

S
slguan 已提交
91
static SSdbObject tsSdbObj = {0};
92 93 94 95 96 97 98 99 100 101 102 103
static taos_qset  tsSdbWriteQset;
static taos_qall  tsSdbWriteQall;
static taos_queue tsSdbWriteQueue;
static SSdbWriteWorkerPool tsSdbPool;

static int     sdbWrite(void *param, void *data, int type);
static int     sdbWriteToQueue(void *param, void *data, int type);
static void *  sdbWorkerFp(void *param);
static int32_t sdbInitWriteWorker();
static void    sdbCleanupWriteWorker();
static int32_t sdbAllocWriteQueue();
static void    sdbFreeWritequeue();
S
Shengliang Guan 已提交
104 105 106 107 108
static int32_t sdbUpdateRowImp(SSdbOper *pOper);
static int32_t sdbDeleteRowImp(SSdbOper *pOper);
static int32_t sdbInsertHash(SSdbTable *pTable, SSdbOper *pOper);
static int32_t sdbUpdateHash(SSdbTable *pTable, SSdbOper *pOper);
static int32_t sdbDeleteHash(SSdbTable *pTable, SSdbOper *pOper);
S
slguan 已提交
109

S
slguan 已提交
110 111 112 113 114 115 116
int32_t sdbGetId(void *handle) {
  return ((SSdbTable *)handle)->autoIndex;
}

int64_t sdbGetNumOfRows(void *handle) {
  return ((SSdbTable *)handle)->numOfRows;
}
S
slguan 已提交
117

S
slguan 已提交
118
uint64_t sdbGetVersion() {
S
slguan 已提交
119 120 121 122 123
  return tsSdbObj.version;
}

bool sdbIsMaster() { 
  return tsSdbObj.role == TAOS_SYNC_ROLE_MASTER; 
S
slguan 已提交
124 125
}

S
slguan 已提交
126 127 128 129
bool sdbIsServing() {
  return tsSdbObj.status == SDB_STATUS_SERVING; 
}

130 131 132 133 134 135 136 137
static void *sdbGetObjKey(SSdbTable *pTable, void *key) {
  if (pTable->keyType == SDB_KEY_VAR_STRING) {
    return *(char **)key;
  }

  return key;
}

S
slguan 已提交
138 139 140 141 142 143 144 145 146 147 148
static char *sdbGetActionStr(int32_t action) {
  switch (action) {
    case SDB_ACTION_INSERT:
      return "insert";
    case SDB_ACTION_DELETE:
      return "delete";
    case SDB_ACTION_UPDATE:
      return "update";
  }
  return "invalid";
}
S
slguan 已提交
149

150
static char *sdbGetKeyStr(SSdbTable *pTable, void *key) {
S
slguan 已提交
151 152
  static char str[16];
  switch (pTable->keyType) {
S
slguan 已提交
153
    case SDB_KEY_STRING:
154 155
    case SDB_KEY_VAR_STRING:
      return (char *)key;
S
slguan 已提交
156
    case SDB_KEY_INT:
S
slguan 已提交
157
    case SDB_KEY_AUTO:
158
      sprintf(str, "%d", *(int32_t *)key);
S
slguan 已提交
159 160
      return str;
    default:
S
slguan 已提交
161
      return "invalid";
S
slguan 已提交
162 163 164
  }
}

165 166 167 168
static char *sdbGetKeyStrFromObj(SSdbTable *pTable, void *key) {
  return sdbGetKeyStr(pTable, sdbGetObjKey(pTable, key));
}

S
slguan 已提交
169
static void *sdbGetTableFromId(int32_t tableId) {
S
slguan 已提交
170
  return tsSdbObj.tableList[tableId];
S
slguan 已提交
171 172
}

S
slguan 已提交
173
static int32_t sdbInitWal() {
陶建辉(Jeff)'s avatar
陶建辉(Jeff) 已提交
174
  SWalCfg walCfg = {.walLevel = 2, .wals = 2, .keep = 1, .fsyncPeriod = 0};
S
slguan 已提交
175 176 177
  char temp[TSDB_FILENAME_LEN];
  sprintf(temp, "%s/wal", tsMnodeDir);
  tsSdbObj.wal = walOpen(temp, &walCfg);
S
slguan 已提交
178 179
  if (tsSdbObj.wal == NULL) {
    sdbError("failed to open sdb wal in %s", tsMnodeDir);
H
hzcheng 已提交
180 181 182
    return -1;
  }

S
Shengliang Guan 已提交
183
  sdbInfo("open sdb wal for restore");
S
slguan 已提交
184
  walRestore(tsSdbObj.wal, NULL, sdbWrite);
S
slguan 已提交
185 186
  return 0;
}
H
hzcheng 已提交
187

S
slguan 已提交
188
static void sdbRestoreTables() {
S
slguan 已提交
189 190
  int32_t totalRows = 0;
  int32_t numOfTables = 0;
S
slguan 已提交
191
  for (int32_t tableId = 0; tableId < SDB_TABLE_MAX; ++tableId) {
S
slguan 已提交
192 193
    SSdbTable *pTable = sdbGetTableFromId(tableId);
    if (pTable == NULL) continue;
S
slguan 已提交
194 195
    if (pTable->restoredFp) {
      (*pTable->restoredFp)();
H
hzcheng 已提交
196 197
    }

S
slguan 已提交
198 199
    totalRows += pTable->numOfRows;
    numOfTables++;
200
    sdbDebug("table:%s, is restored, numOfRows:%" PRId64, pTable->tableName, pTable->numOfRows);
S
slguan 已提交
201 202
  }

S
log  
Shengliang Guan 已提交
203
  sdbInfo("sdb is restored, ver:%" PRId64 " totalRows:%d numOfTables:%d", tsSdbObj.version, totalRows, numOfTables);
S
slguan 已提交
204 205 206 207 208 209 210 211
}

void sdbUpdateMnodeRoles() {
  if (tsSdbObj.sync == NULL) return;

  SNodesRole roles = {0};
  syncGetNodesRole(tsSdbObj.sync, &roles);

212
  sdbInfo("update mnodes sync roles, total:%d", tsSdbObj.cfg.replica);
S
slguan 已提交
213
  for (int32_t i = 0; i < tsSdbObj.cfg.replica; ++i) {
214
    SMnodeObj *pMnode = mnodeGetMnode(roles.nodeId[i]);
S
slguan 已提交
215 216
    if (pMnode != NULL) {
      pMnode->role = roles.role[i];
217
      sdbInfo("mnode:%d, role:%s", pMnode->mnodeId, mnodeGetMnodeRoleStr(pMnode->role));
218
      if (pMnode->mnodeId == dnodeGetDnodeId()) tsSdbObj.role = pMnode->role;
219
      mnodeDecMnodeRef(pMnode);
S
slguan 已提交
220 221
    }
  }
222

S
Shengliang Guan 已提交
223
  mnodeUpdateClusterId();
224
  mnodeUpdateMnodeEpSet();
S
slguan 已提交
225 226
}

227
static uint32_t sdbGetFileInfo(void *ahandle, char *name, uint32_t *index, uint32_t eindex, int64_t *size, uint64_t *fversion) {
S
slguan 已提交
228 229 230 231 232
  sdbUpdateMnodeRoles();
  return 0;
}

static int sdbGetWalInfo(void *ahandle, char *name, uint32_t *index) {
S
slguan 已提交
233
  return walGetWalFile(tsSdbObj.wal, name, index);
S
slguan 已提交
234 235 236
}

static void sdbNotifyRole(void *ahandle, int8_t role) {
237
  sdbInfo("mnode role changed from %s to %s", mnodeGetMnodeRoleStr(tsSdbObj.role), mnodeGetMnodeRoleStr(role));
S
slguan 已提交
238 239 240 241 242 243 244 245 246

  if (role == TAOS_SYNC_ROLE_MASTER && tsSdbObj.role != TAOS_SYNC_ROLE_MASTER) {
    balanceReset();
  }
  tsSdbObj.role = role;

  sdbUpdateMnodeRoles();
}

247
FORCE_INLINE
S
slguan 已提交
248
static void sdbConfirmForward(void *ahandle, void *param, int32_t code) {
S
Shengliang Guan 已提交
249 250 251
  assert(param);
  SSdbOper * pOper = param;
  SMnodeMsg *pMsg = pOper->pMsg;
252
  if (code <= 0) pOper->retCode = code;
S
Shengliang Guan 已提交
253

254 255
  int32_t processedCount = atomic_add_fetch_32(&pOper->processedCount, 1);
  if (processedCount <= 1) {
S
Shengliang Guan 已提交
256
    if (pMsg != NULL) {
S
Shengliang Guan 已提交
257 258
      sdbDebug("app:%p:%p, waiting for confirm this operation, count:%d result:%s", pMsg->rpcMsg.ahandle, pMsg,
               processedCount, tstrerror(code));
S
Shengliang Guan 已提交
259 260 261 262 263
    }
    return;
  }

  if (pMsg != NULL) {
S
Shengliang Guan 已提交
264 265
    sdbDebug("app:%p:%p, is confirmed and will do callback func, result:%s", pMsg->rpcMsg.ahandle, pMsg,
             tstrerror(code));
S
Shengliang Guan 已提交
266 267
  }

S
Shengliang Guan 已提交
268 269 270 271 272 273 274 275 276 277 278 279 280 281
  // failed to forward, need revert insert
  if (pOper->retCode != TSDB_CODE_SUCCESS) {
    SWalHead *pHead = (void *)pOper + sizeof(SSdbOper) + SDB_SYNC_HACK;
    int32_t   action = pHead->msgType % 10;
    sdbError("table:%s record:%p:%s ver:%" PRIu64 ", action:%d failed to foward reason:%s",
             ((SSdbTable *)pOper->table)->tableName, pOper->pObj, sdbGetKeyStr(pOper->table, pHead->cont),
             pHead->version, action, tstrerror(pOper->retCode));
    if (action == SDB_ACTION_INSERT) {
      sdbDeleteHash(pOper->table, pOper);
    }
  }

  if (pOper->writeCb != NULL) {
    pOper->retCode = (*pOper->writeCb)(pMsg, pOper->retCode);
282
  }
S
Shengliang Guan 已提交
283
  dnodeSendRpcMnodeWriteRsp(pMsg, pOper->retCode);
284 285 286 287 288

  // if ahandle, means this func is called by sdb write
  if (ahandle == NULL) {
    sdbDecRef(pOper->table, pOper->pObj);
  }
S
Shengliang Guan 已提交
289

S
Shengliang Guan 已提交
290
  taosFreeQitem(pOper);
S
slguan 已提交
291 292 293
}

void sdbUpdateSync() {
294 295 296 297 298
  if (!mnodeIsRunning()) {
    mDebug("mnode not start yet, update sync info later");
    return;
  }

S
slguan 已提交
299
  SSyncCfg syncCfg = {0};
300
  int32_t  index = 0;
S
slguan 已提交
301

S
slguan 已提交
302
  SDMMnodeInfos *mnodes = dnodeGetMnodeInfos();
S
slguan 已提交
303
  for (int32_t i = 0; i < mnodes->nodeNum; ++i) {
S
slguan 已提交
304
    SDMMnodeInfo *node = &mnodes->nodeInfos[i];
S
slguan 已提交
305
    syncCfg.nodeInfo[i].nodeId = node->nodeId;
J
jtao1735 已提交
306 307
    taosGetFqdnPortFromEp(node->nodeEp, syncCfg.nodeInfo[i].nodeFqdn, &syncCfg.nodeInfo[i].nodePort);
    syncCfg.nodeInfo[i].nodePort += TSDB_PORT_SYNC;
S
slguan 已提交
308 309 310 311
    index++;
  }

  if (index == 0) {
S
Shengliang Guan 已提交
312
    void *pIter = NULL;
S
slguan 已提交
313 314
    while (1) {
      SMnodeObj *pMnode = NULL;
315
      pIter = mnodeGetNextMnode(pIter, &pMnode);
S
slguan 已提交
316 317 318 319
      if (pMnode == NULL) break;

      syncCfg.nodeInfo[index].nodeId = pMnode->mnodeId;

320
      SDnodeObj *pDnode = mnodeGetDnode(pMnode->mnodeId);
321 322
      if (pDnode != NULL) {
        syncCfg.nodeInfo[index].nodePort = pDnode->dnodePort + TSDB_PORT_SYNC;
S
Shengliang Guan 已提交
323
        tstrncpy(syncCfg.nodeInfo[index].nodeFqdn, pDnode->dnodeFqdn, TSDB_FQDN_LEN);
324 325 326
        index++;
      }

327 328
      mnodeDecDnodeRef(pDnode);
      mnodeDecMnodeRef(pMnode);
S
slguan 已提交
329
    }
S
Shengliang Guan 已提交
330
    sdbFreeIter(pIter);
S
slguan 已提交
331 332 333
  }

  syncCfg.replica = index;
334
  syncCfg.quorum = (syncCfg.replica == 1) ? 1 : 2;
S
slguan 已提交
335 336 337 338 339 340 341 342 343 344 345 346

  bool hasThisDnode = false;
  for (int32_t i = 0; i < syncCfg.replica; ++i) {
    if (syncCfg.nodeInfo[i].nodeId == dnodeGetDnodeId()) {
      hasThisDnode = true;
      break;
    }
  }

  if (!hasThisDnode) return;
  if (memcmp(&syncCfg, &tsSdbObj.cfg, sizeof(SSyncCfg)) == 0) return;

347
  sdbInfo("work as mnode, replica:%d", syncCfg.replica);
S
slguan 已提交
348
  for (int32_t i = 0; i < syncCfg.replica; ++i) {
349
    sdbInfo("mnode:%d, %s:%d", syncCfg.nodeInfo[i].nodeId, syncCfg.nodeInfo[i].nodeFqdn, syncCfg.nodeInfo[i].nodePort);
S
slguan 已提交
350
  }
S
slguan 已提交
351

guanshengliang's avatar
guanshengliang 已提交
352
  SSyncInfo syncInfo = {0};
S
slguan 已提交
353 354 355
  syncInfo.vgId = 1;
  syncInfo.version = sdbGetVersion();
  syncInfo.syncCfg = syncCfg;
S
slguan 已提交
356
  sprintf(syncInfo.path, "%s", tsMnodeDir);
S
slguan 已提交
357 358 359
  syncInfo.ahandle = NULL;
  syncInfo.getWalInfo = sdbGetWalInfo;
  syncInfo.getFileInfo = sdbGetFileInfo;
360
  syncInfo.writeToCache = sdbWriteToQueue;
361
  syncInfo.confirmForward = sdbConfirmForward;
S
slguan 已提交
362 363
  syncInfo.notifyRole = sdbNotifyRole;
  tsSdbObj.cfg = syncCfg;
364

S
slguan 已提交
365 366 367 368 369
  if (tsSdbObj.sync) {
    syncReconfig(tsSdbObj.sync, &syncCfg);
  } else {
    tsSdbObj.sync = syncStart(&syncInfo);
  }
370
  sdbUpdateMnodeRoles();
S
slguan 已提交
371 372 373 374 375
}

int32_t sdbInit() {
  pthread_mutex_init(&tsSdbObj.mutex, NULL);

376 377 378 379
  if (sdbInitWriteWorker() != 0) {
    return -1;
  }

S
slguan 已提交
380 381 382
  if (sdbInitWal() != 0) {
    return -1;
  }
383

S
slguan 已提交
384
  sdbRestoreTables();
S
slguan 已提交
385

386
  if (mnodeGetMnodesNum() == 1) {
S
slguan 已提交
387 388 389 390 391 392
    tsSdbObj.role = TAOS_SYNC_ROLE_MASTER;
  }

  sdbUpdateSync();

  tsSdbObj.status = SDB_STATUS_SERVING;
S
slguan 已提交
393
  return TSDB_CODE_SUCCESS;
H
hzcheng 已提交
394 395
}

S
slguan 已提交
396
void sdbCleanUp() {
S
slguan 已提交
397 398
  if (tsSdbObj.status != SDB_STATUS_SERVING) return;

S
slguan 已提交
399
  tsSdbObj.status = SDB_STATUS_CLOSING;
guanshengliang's avatar
guanshengliang 已提交
400
  
401
  sdbCleanupWriteWorker();
S
log  
Shengliang Guan 已提交
402
  sdbDebug("sdb will be closed, ver:%" PRId64, tsSdbObj.version);
403

guanshengliang's avatar
guanshengliang 已提交
404 405 406 407 408 409 410 411 412 413
  if (tsSdbObj.sync) {
    syncStop(tsSdbObj.sync);
    tsSdbObj.sync = NULL;
  }

  if (tsSdbObj.wal) {
    walClose(tsSdbObj.wal);
    tsSdbObj.wal = NULL;
  }
  
S
slguan 已提交
414
  pthread_mutex_destroy(&tsSdbObj.mutex);
H
hzcheng 已提交
415 416
}

417
void sdbIncRef(void *handle, void *pObj) {
418
  if (pObj == NULL || handle == NULL) return;
419 420 421

  SSdbTable *pTable = handle;
  int32_t *  pRefCount = (int32_t *)(pObj + pTable->refCountPos);
S
Shengliang Guan 已提交
422 423
  int32_t    refCount = atomic_add_fetch_32(pRefCount, 1);
  sdbTrace("add ref to table:%s record:%p:%s:%d", pTable->tableName, pObj, sdbGetKeyStrFromObj(pTable, pObj), refCount);
S
slguan 已提交
424 425
}

426
void sdbDecRef(void *handle, void *pObj) {
427
  if (pObj == NULL || handle == NULL) return;
428 429 430 431

  SSdbTable *pTable = handle;
  int32_t *  pRefCount = (int32_t *)(pObj + pTable->refCountPos);
  int32_t    refCount = atomic_sub_fetch_32(pRefCount, 1);
S
Shengliang Guan 已提交
432
  sdbTrace("def ref of table:%s record:%p:%s:%d", pTable->tableName, pObj, sdbGetKeyStrFromObj(pTable, pObj), refCount);
S
slguan 已提交
433

S
Shengliang Guan 已提交
434
  int32_t *updateEnd = pObj + pTable->refCountPos - 4;
435
  if (refCount <= 0 && *updateEnd) {
S
Shengliang Guan 已提交
436
    sdbTrace("table:%s, record:%p:%s:%d is destroyed", pTable->tableName, pObj, sdbGetKeyStrFromObj(pTable, pObj), refCount);
437 438 439 440
    SSdbOper oper = {.pObj = pObj};
    (*pTable->destroyFp)(&oper);
  }
}
S
slguan 已提交
441

S
Shengliang Guan 已提交
442
static void *sdbGetRowMeta(SSdbTable *pTable, void *key) {
443
  if (pTable == NULL) return NULL;
S
slguan 已提交
444

445
  int32_t keySize = sizeof(int32_t);
446
  if (pTable->keyType == SDB_KEY_STRING || pTable->keyType == SDB_KEY_VAR_STRING) {
447 448
    keySize = strlen((char *)key);
  }
S
Shengliang Guan 已提交
449 450 451 452

  void **ppRow = (void **)taosHashGet(pTable->iHandle, key, keySize);
  if (ppRow == NULL) return NULL;
  return *ppRow;
453
}
S
slguan 已提交
454

S
Shengliang Guan 已提交
455
static void *sdbGetRowMetaFromObj(SSdbTable *pTable, void *key) {
456
  return sdbGetRowMeta(pTable, sdbGetObjKey(pTable, key));
S
slguan 已提交
457 458
}

H
hzcheng 已提交
459
void *sdbGetRow(void *handle, void *key) {
S
Shengliang Guan 已提交
460 461 462 463
  void *pRow = sdbGetRowMeta(handle, key);
  if (pRow) {
    sdbIncRef(handle, pRow);
    return pRow;
464 465 466
  } else {
    return NULL;
  }
H
hzcheng 已提交
467 468
}

469 470 471 472
static void *sdbGetRowFromObj(SSdbTable *pTable, void *key) {
  return sdbGetRow(pTable, sdbGetObjKey(pTable, key));
}

S
slguan 已提交
473
static int32_t sdbInsertHash(SSdbTable *pTable, SSdbOper *pOper) {
474
  void *  key = sdbGetObjKey(pTable, pOper->pObj);
475
  int32_t keySize = sizeof(int32_t);
476 477 478

  if (pTable->keyType == SDB_KEY_STRING || pTable->keyType == SDB_KEY_VAR_STRING) {
    keySize = strlen((char *)key);
479
  }
480

481
  taosHashPut(pTable->iHandle, key, keySize, &pOper->pObj, sizeof(int64_t));
482

S
slguan 已提交
483
  sdbIncRef(pTable, pOper->pObj);
484
  atomic_add_fetch_32(&pTable->numOfRows, 1);
S
slguan 已提交
485

S
Shengliang Guan 已提交
486 487 488
  if (pTable->keyType == SDB_KEY_AUTO) {
    pTable->autoIndex = MAX(pTable->autoIndex, *((uint32_t *)pOper->pObj));
  } else {
489
    atomic_add_fetch_32(&pTable->autoIndex, 1);
S
slguan 已提交
490 491
  }

S
Shengliang Guan 已提交
492 493
  sdbDebug("table:%s, insert record:%s to hash, rowSize:%d numOfRows:%" PRId64 ", msg:%p", pTable->tableName,
           sdbGetKeyStrFromObj(pTable, pOper->pObj), pOper->rowSize, pTable->numOfRows, pOper->pMsg);
S
slguan 已提交
494

S
Shengliang Guan 已提交
495 496 497 498 499 500 501
  int32_t code = (*pTable->insertFp)(pOper);
  if (code != TSDB_CODE_SUCCESS) {
    sdbError("table:%s, failed to insert record:%s to hash, remove it", pTable->tableName,
             sdbGetKeyStrFromObj(pTable, pOper->pObj));
    sdbDeleteHash(pTable, pOper);
  }

S
slguan 已提交
502 503
  return TSDB_CODE_SUCCESS;
}
H
hzcheng 已提交
504

S
slguan 已提交
505
static int32_t sdbDeleteHash(SSdbTable *pTable, SSdbOper *pOper) {
S
Shengliang Guan 已提交
506 507 508 509 510 511 512 513
  int32_t *updateEnd = pOper->pObj + pTable->refCountPos - 4;
  bool set = atomic_val_compare_exchange_32(updateEnd, 0, 1) == 0;
  if (!set) {
    sdbError("table:%s, failed to delete record:%s from hash, for it already removed", pTable->tableName,
             sdbGetKeyStrFromObj(pTable, pOper->pObj));
    return TSDB_CODE_MND_SDB_OBJ_NOT_THERE;
  }

S
slguan 已提交
514 515
  (*pTable->deleteFp)(pOper);
  
516
  void *  key = sdbGetObjKey(pTable, pOper->pObj);
517
  int32_t keySize = sizeof(int32_t);
518 519
  if (pTable->keyType == SDB_KEY_STRING || pTable->keyType == SDB_KEY_VAR_STRING) {
    keySize = strlen((char *)key);
520
  }
521 522

  taosHashRemove(pTable->iHandle, key, keySize);
523 524
  atomic_sub_fetch_32(&pTable->numOfRows, 1);
  
S
Shengliang Guan 已提交
525 526
  sdbDebug("table:%s, delete record:%s from hash, numOfRows:%" PRId64 ", msg:%p", pTable->tableName,
           sdbGetKeyStrFromObj(pTable, pOper->pObj), pTable->numOfRows, pOper->pMsg);
S
slguan 已提交
527 528

  sdbDecRef(pTable, pOper->pObj);
S
slguan 已提交
529

S
slguan 已提交
530 531 532
  return TSDB_CODE_SUCCESS;
}

S
slguan 已提交
533
static int32_t sdbUpdateHash(SSdbTable *pTable, SSdbOper *pOper) {
S
Shengliang Guan 已提交
534 535
  sdbDebug("table:%s, update record:%s in hash, numOfRows:%" PRId64 ", msg:%p", pTable->tableName,
           sdbGetKeyStrFromObj(pTable, pOper->pObj), pTable->numOfRows, pOper->pMsg);
S
slguan 已提交
536 537 538 539 540

  (*pTable->updateFp)(pOper);
  return TSDB_CODE_SUCCESS;
}

S
slguan 已提交
541
static int sdbWrite(void *param, void *data, int type) {
542
  SSdbOper *pOper = param;
S
slguan 已提交
543
  SWalHead *pHead = data;
544 545
  int32_t tableId = pHead->msgType / 10;
  int32_t action = pHead->msgType % 10;
S
slguan 已提交
546

S
slguan 已提交
547 548
  SSdbTable *pTable = sdbGetTableFromId(tableId);
  assert(pTable != NULL);
S
slguan 已提交
549

S
slguan 已提交
550
  pthread_mutex_lock(&tsSdbObj.mutex);
551
  
S
slguan 已提交
552
  if (pHead->version == 0) {
553
    // assign version
S
slguan 已提交
554 555 556 557 558 559
    tsSdbObj.version++;
    pHead->version = tsSdbObj.version;
  } else {
    // for data from WAL or forward, version may be smaller
    if (pHead->version <= tsSdbObj.version) {
      pthread_mutex_unlock(&tsSdbObj.mutex);
S
log  
Shengliang Guan 已提交
560
      sdbDebug("table:%s, failed to restore %s record:%s from source(%d), ver:%" PRId64 " too large, sdb ver:%" PRId64,
561
               pTable->tableName, sdbGetActionStr(action), sdbGetKeyStr(pTable, pHead->cont), type, pHead->version, tsSdbObj.version);
S
slguan 已提交
562 563 564
      return TSDB_CODE_SUCCESS;
    } else if (pHead->version != tsSdbObj.version + 1) {
      pthread_mutex_unlock(&tsSdbObj.mutex);
S
log  
Shengliang Guan 已提交
565
      sdbError("table:%s, failed to restore %s record:%s from source(%d), ver:%" PRId64 " too large, sdb ver:%" PRId64,
566
               pTable->tableName, sdbGetActionStr(action), sdbGetKeyStr(pTable, pHead->cont), type, pHead->version, tsSdbObj.version);
567
      return TSDB_CODE_MND_APP_ERROR;
S
slguan 已提交
568 569 570
    } else {
      tsSdbObj.version = pHead->version;
    }
S
slguan 已提交
571
  }
S
slguan 已提交
572

S
slguan 已提交
573
  int32_t code = walWrite(tsSdbObj.wal, pHead);
S
slguan 已提交
574
  if (code < 0) {
S
slguan 已提交
575 576
    pthread_mutex_unlock(&tsSdbObj.mutex);
    return code;
S
slguan 已提交
577 578
  }

579
  pthread_mutex_unlock(&tsSdbObj.mutex);
580

S
slguan 已提交
581
  // from app, oper is created
582
  if (pOper != NULL) {
583
    // forward to peers
S
Shengliang Guan 已提交
584
    pOper->processedCount = 0;
585
    int32_t syncCode = syncForwardToPeer(tsSdbObj.sync, pHead, pOper, TAOS_QTYPE_RPC);
S
Shengliang Guan 已提交
586
    if (syncCode <= 0) pOper->processedCount = 1;
587 588

    if (syncCode < 0) {
S
log  
Shengliang Guan 已提交
589 590
      sdbError("table:%s, failed to forward request, result:%s action:%s record:%s ver:%" PRId64 ", msg:%p", pTable->tableName,
               tstrerror(syncCode), sdbGetActionStr(action), sdbGetKeyStr(pTable, pHead->cont), pHead->version, pOper->pMsg);
591
    } else if (syncCode > 0) {
S
log  
Shengliang Guan 已提交
592 593
      sdbDebug("table:%s, forward request is sent, action:%s record:%s ver:%" PRId64 ", msg:%p", pTable->tableName,
               sdbGetActionStr(action), sdbGetKeyStr(pTable, pHead->cont), pHead->version, pOper->pMsg);
594
    } else {
S
log  
Shengliang Guan 已提交
595 596
      sdbTrace("table:%s, no need to send fwd request, action:%s record:%s ver:%" PRId64 ", msg:%p", pTable->tableName,
               sdbGetActionStr(action), sdbGetKeyStr(pTable, pHead->cont), pHead->version, pOper->pMsg);
597
    }
598
    return syncCode;
S
slguan 已提交
599
  }
S
slguan 已提交
600

S
log  
Shengliang Guan 已提交
601
  sdbDebug("table:%s, record from wal/fwd is disposed, action:%s record:%s ver:%" PRId64, pTable->tableName,
602 603 604 605 606
           sdbGetActionStr(action), sdbGetKeyStr(pTable, pHead->cont), pHead->version);

  // even it is WAL/FWD, it shall be called to update version in sync
  syncForwardToPeer(tsSdbObj.sync, pHead, pOper, TAOS_QTYPE_RPC);

607
  // from wal or forward msg, oper not created, should add into hash
S
slguan 已提交
608
  if (action == SDB_ACTION_INSERT) {
S
slguan 已提交
609
    SSdbOper oper = {.rowSize = pHead->len, .rowData = pHead->cont, .table = pTable};
S
slguan 已提交
610
    code = (*pTable->decodeFp)(&oper);
S
slguan 已提交
611
    return sdbInsertHash(pTable, &oper);
S
slguan 已提交
612
  } else if (action == SDB_ACTION_DELETE) {
S
Shengliang Guan 已提交
613 614
    void *pRow = sdbGetRowMeta(pTable, pHead->cont);
    if (pRow == NULL) {
615 616 617 618
      sdbError("table:%s, failed to get object:%s from wal while dispose delete action", pTable->tableName,
               pHead->cont);
      return TSDB_CODE_SUCCESS;
    }
S
Shengliang Guan 已提交
619
    SSdbOper oper = {.table = pTable, .pObj = pRow};
S
slguan 已提交
620
    return sdbDeleteHash(pTable, &oper);
S
slguan 已提交
621
  } else if (action == SDB_ACTION_UPDATE) {
S
Shengliang Guan 已提交
622 623
    void *pRow = sdbGetRowMeta(pTable, pHead->cont);
    if (pRow == NULL) {
624 625 626 627
      sdbError("table:%s, failed to get object:%s from wal while dispose update action", pTable->tableName,
               pHead->cont);
      return TSDB_CODE_SUCCESS;
    }
S
slguan 已提交
628
    SSdbOper oper = {.rowSize = pHead->len, .rowData = pHead->cont, .table = pTable};
S
slguan 已提交
629
    code = (*pTable->decodeFp)(&oper);
S
slguan 已提交
630
    return sdbUpdateHash(pTable, &oper);
631 632 633
  } else {
    return TSDB_CODE_MND_INVALID_MSG_TYPE;
  }
S
slguan 已提交
634 635
}

S
slguan 已提交
636
int32_t sdbInsertRow(SSdbOper *pOper) {
S
slguan 已提交
637
  SSdbTable *pTable = (SSdbTable *)pOper->table;
638
  if (pTable == NULL) return TSDB_CODE_MND_SDB_INVALID_TABLE_TYPE;
S
slguan 已提交
639

640
  if (sdbGetRowFromObj(pTable, pOper->pObj)) {
S
Shengliang Guan 已提交
641 642
    sdbError("table:%s, failed to insert record:%s, already exist", pTable->tableName,
             sdbGetKeyStrFromObj(pTable, pOper->pObj));
S
slguan 已提交
643
    sdbDecRef(pTable, pOper->pObj);
644
    return TSDB_CODE_MND_SDB_OBJ_ALREADY_THERE;
S
slguan 已提交
645
  }
S
slguan 已提交
646

S
slguan 已提交
647
  if (pTable->keyType == SDB_KEY_AUTO) {
648
    *((uint32_t *)pOper->pObj) = atomic_add_fetch_32(&pTable->autoIndex, 1);
S
slguan 已提交
649 650 651

    // let vgId increase from 2
    if (pTable->autoIndex == 1 && strcmp(pTable->tableName, "vgroups") == 0) {
652
      *((uint32_t *)pOper->pObj) = atomic_add_fetch_32(&pTable->autoIndex, 1);
S
slguan 已提交
653
    }
S
slguan 已提交
654
  }
H
hzcheng 已提交
655

656 657 658 659 660
  int32_t code = sdbInsertHash(pTable, pOper);
  if (code != TSDB_CODE_SUCCESS) {
    sdbError("table:%s, failed to insert into hash", pTable->tableName);
    return code;
  }
S
slguan 已提交
661

662 663 664
  // just insert data into memory
  if (pOper->type != SDB_OPER_GLOBAL) {
    return TSDB_CODE_SUCCESS;
S
slguan 已提交
665 666
  }

S
Shengliang Guan 已提交
667 668 669 670 671 672 673 674 675 676 677
  if (pOper->reqFp) {
    return (*pOper->reqFp)(pOper->pMsg);
  } else {
    return sdbInsertRowImp(pOper);
  }
}

int32_t sdbInsertRowImp(SSdbOper *pOper) {
  SSdbTable *pTable = (SSdbTable *)pOper->table;
  if (pTable == NULL) return TSDB_CODE_MND_SDB_INVALID_TABLE_TYPE;

678
  int32_t size = sizeof(SSdbOper) + sizeof(SWalHead) + pTable->maxRowSize + SDB_SYNC_HACK;
679
  SSdbOper *pNewOper = taosAllocateQitem(size);
S
Shengliang Guan 已提交
680

681
  SWalHead *pHead = (void *)pNewOper + sizeof(SSdbOper) + SDB_SYNC_HACK;
682 683 684 685 686 687 688 689 690
  pHead->version = 0;
  pHead->len = pOper->rowSize;
  pHead->msgType = pTable->tableId * 10 + SDB_ACTION_INSERT;

  pOper->rowData = pHead->cont;
  (*pTable->encodeFp)(pOper);
  pHead->len = pOper->rowSize;

  memcpy(pNewOper, pOper, sizeof(SSdbOper));
691 692

  if (pNewOper->pMsg != NULL) {
S
Shengliang Guan 已提交
693
    sdbDebug("app:%p:%p, table:%s record:%p:%s, insert action is add to sdb queue", pNewOper->pMsg->rpcMsg.ahandle,
694
             pNewOper->pMsg, pTable->tableName, pOper->pObj, sdbGetKeyStrFromObj(pTable, pOper->pObj));
695 696
  }

697
  sdbIncRef(pNewOper->table, pNewOper->pObj);
698
  taosWriteQitem(tsSdbWriteQueue, TAOS_QTYPE_RPC, pNewOper);
S
Shengliang Guan 已提交
699 700

  return TSDB_CODE_MND_ACTION_IN_PROGRESS;
H
hzcheng 已提交
701 702
}

703 704 705 706
bool sdbCheckRowDeleted(void *pTableInput, void *pRow) {
  SSdbTable *pTable = pTableInput;
  if (pTable == NULL) return false;

S
Shengliang Guan 已提交
707
  int32_t *updateEnd = pRow + pTable->refCountPos - 4;
S
Shengliang Guan 已提交
708
  return atomic_val_compare_exchange_32(updateEnd, 1, 1) == 1;
709 710
}

S
slguan 已提交
711
int32_t sdbDeleteRow(SSdbOper *pOper) {
S
slguan 已提交
712
  SSdbTable *pTable = (SSdbTable *)pOper->table;
713
  if (pTable == NULL) return TSDB_CODE_MND_SDB_INVALID_TABLE_TYPE;
H
hzcheng 已提交
714

S
Shengliang Guan 已提交
715 716
  void *pRow = sdbGetRowMetaFromObj(pTable, pOper->pObj);
  if (pRow == NULL) {
717
    sdbDebug("table:%s, record is not there, delete failed", pTable->tableName);
718
    return TSDB_CODE_MND_SDB_OBJ_NOT_THERE;
H
hzcheng 已提交
719 720
  }

721 722
  sdbIncRef(pTable, pOper->pObj);

723 724 725
  int32_t code = sdbDeleteHash(pTable, pOper);
  if (code != TSDB_CODE_SUCCESS) {
    sdbError("table:%s, failed to delete from hash", pTable->tableName);
726
    sdbDecRef(pTable, pOper->pObj);
727 728
    return code;
  }
H
hzcheng 已提交
729

730 731
  // just delete data from memory
  if (pOper->type != SDB_OPER_GLOBAL) {
732
    sdbDecRef(pTable, pOper->pObj);
733
    return TSDB_CODE_SUCCESS;
S
slguan 已提交
734 735
  }

S
Shengliang Guan 已提交
736 737 738 739 740 741 742 743 744 745 746
  if (pOper->reqFp) {
    return (*pOper->reqFp)(pOper->pMsg);
  } else {
    return sdbDeleteRowImp(pOper);
  }
}

int32_t sdbDeleteRowImp(SSdbOper *pOper) {
  SSdbTable *pTable = (SSdbTable *)pOper->table;
  if (pTable == NULL) return TSDB_CODE_MND_SDB_INVALID_TABLE_TYPE;

747
  int32_t size = sizeof(SSdbOper) + sizeof(SWalHead) + pTable->maxRowSize + SDB_SYNC_HACK;
748 749
  SSdbOper *pNewOper = taosAllocateQitem(size);

750
  SWalHead *pHead = (void *)pNewOper + sizeof(SSdbOper) + SDB_SYNC_HACK;
751 752
  pHead->version = 0;
  pHead->msgType = pTable->tableId * 10 + SDB_ACTION_DELETE;
S
Shengliang Guan 已提交
753

754 755 756
  pOper->rowData = pHead->cont;
  (*pTable->encodeFp)(pOper);
  pHead->len = pOper->rowSize;
757 758

  memcpy(pNewOper, pOper, sizeof(SSdbOper));
759 760

  if (pNewOper->pMsg != NULL) {
S
Shengliang Guan 已提交
761
    sdbDebug("app:%p:%p, table:%s record:%p:%s, delete action is add to sdb queue", pNewOper->pMsg->rpcMsg.ahandle,
762
             pNewOper->pMsg, pTable->tableName, pOper->pObj, sdbGetKeyStrFromObj(pTable, pOper->pObj));
763 764
  }

765
  taosWriteQitem(tsSdbWriteQueue, TAOS_QTYPE_RPC, pNewOper);
S
Shengliang Guan 已提交
766 767

  return TSDB_CODE_MND_ACTION_IN_PROGRESS;
H
hzcheng 已提交
768 769
}

S
slguan 已提交
770
int32_t sdbUpdateRow(SSdbOper *pOper) {
S
slguan 已提交
771
  SSdbTable *pTable = (SSdbTable *)pOper->table;
772
  if (pTable == NULL) return TSDB_CODE_MND_SDB_INVALID_TABLE_TYPE;
H
hzcheng 已提交
773

S
Shengliang Guan 已提交
774 775
  void *pRow = sdbGetRowMetaFromObj(pTable, pOper->pObj);
  if (pRow == NULL) {
776
    sdbDebug("table:%s, record is not there, update failed", pTable->tableName);
777 778 779 780 781 782 783 784 785 786 787 788
    return TSDB_CODE_MND_SDB_OBJ_NOT_THERE;
  }

  int32_t code = sdbUpdateHash(pTable, pOper);
  if (code != TSDB_CODE_SUCCESS) {
    sdbError("table:%s, failed to update hash", pTable->tableName);
    return code;
  }

  // just update data in memory
  if (pOper->type != SDB_OPER_GLOBAL) {
    return TSDB_CODE_SUCCESS;
H
hzcheng 已提交
789 790
  }

S
Shengliang Guan 已提交
791 792 793 794 795 796 797 798 799 800 801
  if (pOper->reqFp) {
    return (*pOper->reqFp)(pOper->pMsg);
  } else {
    return sdbUpdateRowImp(pOper);
  }
}

int32_t sdbUpdateRowImp(SSdbOper *pOper) {
  SSdbTable *pTable = (SSdbTable *)pOper->table;
  if (pTable == NULL) return TSDB_CODE_MND_SDB_INVALID_TABLE_TYPE;

802
  int32_t size = sizeof(SSdbOper) + sizeof(SWalHead) + pTable->maxRowSize + SDB_SYNC_HACK;
803
  SSdbOper *pNewOper = taosAllocateQitem(size);
H
hzcheng 已提交
804

805
  SWalHead *pHead = (void *)pNewOper + sizeof(SSdbOper) + SDB_SYNC_HACK;
806 807
  pHead->version = 0;
  pHead->msgType = pTable->tableId * 10 + SDB_ACTION_UPDATE;
H
hzcheng 已提交
808

809 810 811
  pOper->rowData = pHead->cont;
  (*pTable->encodeFp)(pOper);
  pHead->len = pOper->rowSize;
S
slguan 已提交
812

813 814 815
  memcpy(pNewOper, pOper, sizeof(SSdbOper));

  if (pNewOper->pMsg != NULL) {
S
Shengliang Guan 已提交
816
    sdbDebug("app:%p:%p, table:%s record:%p:%s, update action is add to sdb queue", pNewOper->pMsg->rpcMsg.ahandle,
817
             pNewOper->pMsg, pTable->tableName, pOper->pObj, sdbGetKeyStrFromObj(pTable, pOper->pObj));
818 819
  }

820
  sdbIncRef(pNewOper->table, pNewOper->pObj);
821
  taosWriteQitem(tsSdbWriteQueue, TAOS_QTYPE_RPC, pNewOper);
S
Shengliang Guan 已提交
822 823

  return TSDB_CODE_MND_ACTION_IN_PROGRESS;
S
slguan 已提交
824
}
S
slguan 已提交
825

S
slguan 已提交
826 827 828 829
void *sdbFetchRow(void *handle, void *pNode, void **ppRow) {
  SSdbTable *pTable = (SSdbTable *)handle;
  *ppRow = NULL;
  if (pTable == NULL) return NULL;
H
hzcheng 已提交
830

831 832 833 834 835 836 837 838 839 840
  SHashMutableIterator *pIter = pNode;
  if (pIter == NULL) {
    pIter = taosHashCreateIter(pTable->iHandle);
  }

  if (!taosHashIterNext(pIter)) {
    taosHashDestroyIter(pIter);
    return NULL;
  }

S
Shengliang Guan 已提交
841 842
  void **ppMetaRow = taosHashIterGet(pIter);
  if (ppMetaRow == NULL) {
S
Shengliang Guan 已提交
843 844 845
    taosHashDestroyIter(pIter);
    return NULL;
  }
S
slguan 已提交
846

S
Shengliang Guan 已提交
847 848
  *ppRow = *ppMetaRow;
  sdbIncRef(handle, *ppMetaRow);
H
hzcheng 已提交
849

850
  return pIter;
S
slguan 已提交
851 852
}

S
Shengliang Guan 已提交
853 854 855 856 857 858
void sdbFreeIter(void *pIter) {
  if (pIter != NULL) {
    taosHashDestroyIter(pIter);
  }
}

S
slguan 已提交
859 860
void *sdbOpenTable(SSdbTableDesc *pDesc) {
  SSdbTable *pTable = (SSdbTable *)calloc(1, sizeof(SSdbTable));
S
slguan 已提交
861
  
S
slguan 已提交
862 863
  if (pTable == NULL) return NULL;

864
  tstrncpy(pTable->tableName, pDesc->tableName, SDB_TABLE_LEN);
S
slguan 已提交
865 866 867 868 869 870 871 872 873 874 875
  pTable->keyType      = pDesc->keyType;
  pTable->tableId      = pDesc->tableId;
  pTable->hashSessions = pDesc->hashSessions;
  pTable->maxRowSize   = pDesc->maxRowSize;
  pTable->refCountPos  = pDesc->refCountPos;
  pTable->insertFp     = pDesc->insertFp;
  pTable->deleteFp     = pDesc->deleteFp;
  pTable->updateFp     = pDesc->updateFp;
  pTable->encodeFp     = pDesc->encodeFp;
  pTable->decodeFp     = pDesc->decodeFp;
  pTable->destroyFp    = pDesc->destroyFp;
S
slguan 已提交
876
  pTable->restoredFp   = pDesc->restoredFp;
877 878

  _hash_fn_t hashFp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT);
S
Shengliang Guan 已提交
879
  if (pTable->keyType == SDB_KEY_STRING || pTable->keyType == SDB_KEY_VAR_STRING) {
880
    hashFp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY);
S
slguan 已提交
881
  }
H
Haojun Liao 已提交
882
  pTable->iHandle = taosHashInit(pTable->hashSessions, hashFp, true, true);
S
slguan 已提交
883

S
slguan 已提交
884 885
  tsSdbObj.numOfTables++;
  tsSdbObj.tableList[pTable->tableId] = pTable;
S
slguan 已提交
886
  return pTable;
H
hzcheng 已提交
887 888 889 890 891
}

void sdbCloseTable(void *handle) {
  SSdbTable *pTable = (SSdbTable *)handle;
  if (pTable == NULL) return;
S
slguan 已提交
892
  
S
slguan 已提交
893 894
  tsSdbObj.numOfTables--;
  tsSdbObj.tableList[pTable->tableId] = NULL;
H
hzcheng 已提交
895

896 897
  SHashMutableIterator *pIter = taosHashCreateIter(pTable->iHandle);
  while (taosHashIterNext(pIter)) {
S
Shengliang Guan 已提交
898 899
    void **ppRow = taosHashIterGet(pIter);
    if (ppRow == NULL) continue;
S
slguan 已提交
900

S
slguan 已提交
901
    SSdbOper oper = {
S
Shengliang Guan 已提交
902
      .pObj = *ppRow,
S
slguan 已提交
903 904
      .table = pTable,
    };
905

S
slguan 已提交
906
    (*pTable->destroyFp)(&oper);
H
hzcheng 已提交
907 908
  }

909 910
  taosHashDestroyIter(pIter);
  taosHashCleanup(pTable->iHandle);
H
hzcheng 已提交
911

912
  sdbDebug("table:%s, is closed, numOfTables:%d", pTable->tableName, tsSdbObj.numOfTables);
S
slguan 已提交
913
  free(pTable);
H
hzcheng 已提交
914
}
S
slguan 已提交
915

916 917 918 919 920 921 922 923 924 925 926 927
int32_t sdbInitWriteWorker() {
  tsSdbPool.num = 1;
  tsSdbPool.writeWorker = (SSdbWriteWorker *)calloc(sizeof(SSdbWriteWorker), tsSdbPool.num);

  if (tsSdbPool.writeWorker == NULL) return -1;
  for (int32_t i = 0; i < tsSdbPool.num; ++i) {
    SSdbWriteWorker *pWorker = tsSdbPool.writeWorker + i;
    pWorker->workerId = i;
  }

  sdbAllocWriteQueue();
  
928
  mInfo("sdb write is opened");
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
  return 0;
}

void sdbCleanupWriteWorker() {
  for (int32_t i = 0; i < tsSdbPool.num; ++i) {
    SSdbWriteWorker *pWorker = tsSdbPool.writeWorker + i;
    if (pWorker->thread) {
      taosQsetThreadResume(tsSdbWriteQset);
    }
  }

  for (int32_t i = 0; i < tsSdbPool.num; ++i) {
    SSdbWriteWorker *pWorker = tsSdbPool.writeWorker + i;
    if (pWorker->thread) {
      pthread_join(pWorker->thread, NULL);
    }
  }

  sdbFreeWritequeue();
S
Shengliang Guan 已提交
948
  taosTFree(tsSdbPool.writeWorker);
949

950
  mInfo("sdb write is closed");
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
}

int32_t sdbAllocWriteQueue() {
  tsSdbWriteQueue = taosOpenQueue();
  if (tsSdbWriteQueue == NULL) return TSDB_CODE_MND_OUT_OF_MEMORY;

  tsSdbWriteQset = taosOpenQset();
  if (tsSdbWriteQset == NULL) {
    taosCloseQueue(tsSdbWriteQueue);
    return TSDB_CODE_MND_OUT_OF_MEMORY;
  }
  taosAddIntoQset(tsSdbWriteQset, tsSdbWriteQueue, NULL);

  tsSdbWriteQall = taosAllocateQall();
  if (tsSdbWriteQall == NULL) {
    taosCloseQset(tsSdbWriteQset);
    taosCloseQueue(tsSdbWriteQueue);
    return TSDB_CODE_MND_OUT_OF_MEMORY;
  }
  
  for (int32_t i = 0; i < tsSdbPool.num; ++i) {
    SSdbWriteWorker *pWorker = tsSdbPool.writeWorker + i;
    pWorker->workerId = i;

    pthread_attr_t thAttr;
    pthread_attr_init(&thAttr);
    pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE);

    if (pthread_create(&pWorker->thread, &thAttr, sdbWorkerFp, pWorker) != 0) {
      mError("failed to create thread to process sdb write queue, reason:%s", strerror(errno));
      taosFreeQall(tsSdbWriteQall);
      taosCloseQset(tsSdbWriteQset);
      taosCloseQueue(tsSdbWriteQueue);
      return TSDB_CODE_MND_OUT_OF_MEMORY;
    }

    pthread_attr_destroy(&thAttr);
988
    mDebug("sdb write worker:%d is launched, total:%d", pWorker->workerId, tsSdbPool.num);
989 990
  }

991
  mDebug("sdb write queue:%p is allocated", tsSdbWriteQueue);
992 993 994 995
  return TSDB_CODE_SUCCESS;
}

void sdbFreeWritequeue() {
996
  taosCloseQueue(tsSdbWriteQueue);
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
  taosFreeQall(tsSdbWriteQall);
  taosCloseQset(tsSdbWriteQset);
  tsSdbWriteQall = NULL;
  tsSdbWriteQset = NULL;
  tsSdbWriteQueue = NULL;
}

int sdbWriteToQueue(void *param, void *data, int type) {
  SWalHead *pHead = data;
  int size = sizeof(SWalHead) + pHead->len;
  SWalHead *pWal = (SWalHead *)taosAllocateQitem(size);
  memcpy(pWal, pHead, size);

  taosWriteQitem(tsSdbWriteQueue, type, pWal);
  return 0;
}

static void *sdbWorkerFp(void *param) {
  SWalHead *pHead;
  SSdbOper *pOper;
  int32_t   type;
  int32_t   numOfMsgs;
  void *    item;
  void *    unUsed;

  while (1) {
    numOfMsgs = taosReadAllQitemsFromQset(tsSdbWriteQset, tsSdbWriteQall, &unUsed);
    if (numOfMsgs == 0) {
1025
      sdbDebug("sdbWorkerFp: got no message from qset, exiting...");
1026 1027 1028 1029 1030 1031 1032
      break;
    }

    for (int32_t i = 0; i < numOfMsgs; ++i) {
      taosGetQitem(tsSdbWriteQall, &type, &item);
      if (type == TAOS_QTYPE_RPC) {
        pOper = (SSdbOper *)item;
S
Shengliang Guan 已提交
1033
        pOper->processedCount = 1;
1034
        pHead = (void *)pOper + sizeof(SSdbOper) + SDB_SYNC_HACK;
1035
        if (pOper->pMsg != NULL) {
S
log  
Shengliang Guan 已提交
1036
          sdbDebug("app:%p:%p, table:%s record:%p:%s ver:%" PRIu64 ", will be processed in sdb queue",
1037 1038 1039
                   pOper->pMsg->rpcMsg.ahandle, pOper->pMsg, ((SSdbTable *)pOper->table)->tableName, pOper->pObj,
                   sdbGetKeyStr(pOper->table, pHead->cont), pHead->version);
        }
1040 1041 1042 1043 1044 1045
      } else {
        pHead = (SWalHead *)item;
        pOper = NULL;
      }

      int32_t code = sdbWrite(pOper, pHead, type);
1046
      if (code > 0) code = 0;
1047
      if (pOper) {
1048
        pOper->retCode = code;
1049
      } else {
1050
        pHead->len = code;  // hackway
1051
      }
1052 1053 1054 1055 1056 1057 1058 1059
    }

    walFsync(tsSdbObj.wal);

    // browse all items, and process them one by one
    taosResetQitems(tsSdbWriteQall);
    for (int32_t i = 0; i < numOfMsgs; ++i) {
      taosGetQitem(tsSdbWriteQall, &type, &item);
1060

1061 1062
      if (type == TAOS_QTYPE_RPC) {
        pOper = (SSdbOper *)item;
S
Shengliang Guan 已提交
1063
        sdbConfirmForward(NULL, pOper, pOper->retCode);
1064
      } else if (type == TAOS_QTYPE_FWD) {
S
Shengliang Guan 已提交
1065
        pHead = (SWalHead *)item;
1066
        syncConfirmForward(tsSdbObj.sync, pHead->version, pHead->len);
S
Shengliang Guan 已提交
1067
        taosFreeQitem(item);
1068
      } else {
S
Shengliang Guan 已提交
1069
        taosFreeQitem(item);
1070 1071 1072 1073 1074 1075
      }
    }
  }

  return NULL;
}