/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE
#include "os.h"
#include "taoserror.h"
#include "trpc.h"
#include "tutil.h"
#include "tbalance.h"
#include "tqueue.h"
#include "twal.h"
#include "tsync.h"
#include "tglobal.h"
#include "hashint.h"
#include "hashstr.h"
#include "dnode.h"
#include "mgmtDef.h"
#include "mgmtLog.h"
#include "mgmtMnode.h"
#include "mgmtSdb.h"

typedef enum {
  SDB_ACTION_INSERT,
  SDB_ACTION_DELETE,
  SDB_ACTION_UPDATE
} ESdbAction;

typedef enum {
  SDB_STATUS_OFFLINE,
  SDB_STATUS_SERVING,
  SDB_ACTION_CLOSING
} ESdbStatus;

typedef struct _SSdbTable {
  char      tableName[TSDB_DB_NAME_LEN + 1];
  ESdbTable tableId;
  ESdbKey   keyType;
  int32_t   hashSessions;
  int32_t   maxRowSize;
  int32_t   refCountPos;
  int32_t   autoIndex;
  int64_t   numOfRows;
  void *    iHandle;
  int32_t (*insertFp)(SSdbOper *pDesc);
  int32_t (*deleteFp)(SSdbOper *pOper);
  int32_t (*updateFp)(SSdbOper *pOper);
  int32_t (*decodeFp)(SSdbOper *pOper);
  int32_t (*encodeFp)(SSdbOper *pOper);
  int32_t (*destroyFp)(SSdbOper *pOper);
  int32_t (*restoredFp)();
  pthread_mutex_t mutex;
} SSdbTable;

typedef struct {
  ESyncRole  role;
  ESdbStatus status;
  int64_t    version;
  void *     sync;
  void *     wal;
  SSyncCfg   cfg;
  sem_t      sem;
  int32_t    code;
  int32_t    numOfTables;
  SSdbTable *tableList[SDB_TABLE_MAX];
  pthread_mutex_t mutex;
} SSdbObject;

typedef struct {
  int32_t rowSize;
  void *  row;
} SSdbRow;

static SSdbObject tsSdbObj = {0};
static void *(*sdbInitIndexFp[])(int32_t maxRows, int32_t dataSize) = {sdbOpenStrHash, sdbOpenIntHash, sdbOpenIntHash};
static void *(*sdbAddIndexFp[])(void *handle, void *key, void *data) = {sdbAddStrHash, sdbAddIntHash, sdbAddIntHash};
static void  (*sdbDeleteIndexFp[])(void *handle, void *key) = {sdbDeleteStrHash, sdbDeleteIntHash, sdbDeleteIntHash};
static void *(*sdbGetIndexFp[])(void *handle, void *key) = {sdbGetStrHashData, sdbGetIntHashData, sdbGetIntHashData};
static void  (*sdbCleanUpIndexFp[])(void *handle) = {sdbCloseStrHash, sdbCloseIntHash, sdbCloseIntHash};
static void *(*sdbFetchRowFp[])(void *handle, void *ptr, void **ppRow) = {sdbFetchStrHashData, sdbFetchIntHashData, sdbFetchIntHashData};
static int sdbWrite(void *param, void *data, int type);

int32_t sdbGetId(void *handle) {
  return ((SSdbTable *)handle)->autoIndex;
}

int64_t sdbGetNumOfRows(void *handle) {
  return ((SSdbTable *)handle)->numOfRows;
}

uint64_t sdbGetVersion() {
  return tsSdbObj.version;
}

bool sdbIsMaster() { 
  return tsSdbObj.role == TAOS_SYNC_ROLE_MASTER; 
}

static char *sdbGetActionStr(int32_t action) {
  switch (action) {
    case SDB_ACTION_INSERT:
      return "insert";
    case SDB_ACTION_DELETE:
      return "delete";
    case SDB_ACTION_UPDATE:
      return "update";
  }
  return "invalid";
}

static char *sdbGetkeyStr(SSdbTable *pTable, void *row) {
  static char str[16];
  switch (pTable->keyType) {
    case SDB_KEY_STRING:
      return (char *)row;
    case SDB_KEY_INT:
    case SDB_KEY_AUTO:
      sprintf(str, "%d", *(int32_t *)row);
      return str;
    default:
      return "invalid";
  }
}

static void *sdbGetTableFromId(int32_t tableId) {
  return tsSdbObj.tableList[tableId];
}

static int32_t sdbInitWal() {
  SWalCfg walCfg = {.commitLog = 2, .wals = 2, .keep = 1};
  tsSdbObj.wal = walOpen(tsMnodeDir, &walCfg);
  if (tsSdbObj.wal == NULL) {
    sdbError("failed to open sdb wal in %s", tsMnodeDir);
    return -1;
  }

  sdbTrace("open sdb wal for restore");
  walRestore(tsSdbObj.wal, NULL, sdbWrite);
  return 0;
}

static void sdbRestoreTables() {
  int32_t totalRows = 0;
  int32_t numOfTables = 0;
  for (int32_t tableId = 0; tableId < SDB_TABLE_MAX; ++tableId) {
    SSdbTable *pTable = sdbGetTableFromId(tableId);
    if (pTable == NULL) continue;
    if (pTable->restoredFp) {
      (*pTable->restoredFp)();
    }

    totalRows += pTable->numOfRows;
    numOfTables++;
    sdbTrace("table:%s, is restored, numOfRows:%d", pTable->tableName, pTable->numOfRows);
  }

  sdbTrace("sdb is restored, version:%d totalRows:%d numOfTables:%d", tsSdbObj.version, totalRows, numOfTables);
}

void sdbUpdateMnodeRoles() {
  if (tsSdbObj.sync == NULL) return;

  SNodesRole roles = {0};
  syncGetNodesRole(tsSdbObj.sync, &roles);

  sdbPrint("update mnodes:%d sync roles", tsSdbObj.cfg.replica);
  for (int32_t i = 0; i < tsSdbObj.cfg.replica; ++i) {
    SMnodeObj *pMnode = mgmtGetMnode(roles.nodeId[i]);
    if (pMnode != NULL) {
      pMnode->role = roles.role[i];
      sdbPrint("mnode:%d, role:%s", pMnode->mnodeId, mgmtGetMnodeRoleStr(pMnode->role));
      mgmtReleaseMnode(pMnode);
    }
  }
}

static uint32_t sdbGetFileInfo(void *ahandle, char *name, uint32_t *index, int32_t *size) {
  sdbUpdateMnodeRoles();
  return 0;
}

static int sdbGetWalInfo(void *ahandle, char *name, uint32_t *index) {
  strcpy(name, "wal0");
  return 0;
}

static void sdbNotifyRole(void *ahandle, int8_t role) {
  sdbPrint("mnode role changed from %s to %s", mgmtGetMnodeRoleStr(tsSdbObj.role), mgmtGetMnodeRoleStr(role));

  if (role == TAOS_SYNC_ROLE_MASTER && tsSdbObj.role != TAOS_SYNC_ROLE_MASTER) {
    balanceReset();
  }
  tsSdbObj.role = role;

  sdbUpdateMnodeRoles();
}

static void sdbConfirmForward(void *ahandle, void *param, int32_t code) {
  tsSdbObj.code = code;
  sdbTrace("sdb forward request confirmed, result:%s", tstrerror(code));
  sem_post(&tsSdbObj.sem);
}

static int32_t sdbForwardToPeer(void *pHead) {
  if (tsSdbObj.sync == NULL) return TSDB_CODE_SUCCESS;

  int32_t code = syncForwardToPeer(tsSdbObj.sync, pHead, NULL);
  if (code > 0) {
    sem_wait(&tsSdbObj.sem);
    return tsSdbObj.code;
  } 
  return code;
}

void sdbUpdateSync() {
  SSyncCfg syncCfg = {0};
  int32_t index = 0;

  SDMMnodeInfos *mnodes = dnodeGetMnodeInfos();
  for (int32_t i = 0; i < mnodes->nodeNum; ++i) {
    SDMMnodeInfo *node = &mnodes->nodeInfos[i];
    syncCfg.nodeInfo[i].nodeId = node->nodeId;
    taosGetFqdnPortFromEp(node->nodeEp, syncCfg.nodeInfo[i].nodeFqdn, &syncCfg.nodeInfo[i].nodePort);
    syncCfg.nodeInfo[i].nodePort += TSDB_PORT_SYNC;
    index++;
  }

  if (index == 0) {
    void *pNode = NULL;
    while (1) {
      SMnodeObj *pMnode = NULL;
      pNode = mgmtGetNextMnode(pNode, &pMnode);
      if (pMnode == NULL) break;

      syncCfg.nodeInfo[index].nodeId = pMnode->mnodeId;
      syncCfg.nodeInfo[index].nodePort = pMnode->pDnode->dnodePort + TSDB_PORT_SYNC;
      strcpy(syncCfg.nodeInfo[index].nodeFqdn, pMnode->pDnode->dnodeEp);
      index++;

      mgmtReleaseMnode(pMnode);
    }
  }

  syncCfg.replica = index;
  syncCfg.arbitratorPort = syncCfg.nodeInfo[0].nodePort;
  strcpy(syncCfg.arbitratorFqdn, syncCfg.nodeInfo[0].nodeFqdn);
  if (syncCfg.replica == 1) {
    syncCfg.quorum = 1;
  } else {
    syncCfg.quorum = 2;
  }

  bool hasThisDnode = false;
  for (int32_t i = 0; i < syncCfg.replica; ++i) {
    if (syncCfg.nodeInfo[i].nodeId == dnodeGetDnodeId()) {
      hasThisDnode = true;
      break;
    }
  }

  if (!hasThisDnode) return;
  if (memcmp(&syncCfg, &tsSdbObj.cfg, sizeof(SSyncCfg)) == 0) return;

  sdbPrint("work as mnode, replica:%d arbitrator:%s", syncCfg.replica, syncCfg.arbitratorFqdn);
  for (int32_t i = 0; i < syncCfg.replica; ++i) {
    sdbPrint("mnode:%d, ip:%s", syncCfg.nodeInfo[i].nodeId, syncCfg.nodeInfo[i].nodeFqdn);
  }

  SSyncInfo syncInfo;
  syncInfo.vgId = 1;
  syncInfo.version = sdbGetVersion();
  syncInfo.syncCfg = syncCfg;
  sprintf(syncInfo.path, "%s/", tsMnodeDir);
  syncInfo.ahandle = NULL;
  syncInfo.getWalInfo = sdbGetWalInfo;
  syncInfo.getFileInfo = sdbGetFileInfo;
  syncInfo.writeToCache = sdbWrite;
  syncInfo.confirmForward = sdbConfirmForward; 
  syncInfo.notifyRole = sdbNotifyRole;
  tsSdbObj.cfg = syncCfg;

  if (tsSdbObj.sync) {
    syncReconfig(tsSdbObj.sync, &syncCfg);
  } else {
    tsSdbObj.sync = syncStart(&syncInfo);
  }
}

int32_t sdbInit() {
  pthread_mutex_init(&tsSdbObj.mutex, NULL);
  sem_init(&tsSdbObj.sem, 0, 0);

  if (sdbInitWal() != 0) {
    return -1;
  }
  
  sdbRestoreTables();

  if (mgmtGetMnodesNum() == 1) {
    tsSdbObj.role = TAOS_SYNC_ROLE_MASTER;
  }

  sdbUpdateSync();

  tsSdbObj.status = SDB_STATUS_SERVING;
  return TSDB_CODE_SUCCESS;
}

void sdbCleanUp() {
  if (tsSdbObj.status != SDB_STATUS_SERVING) return;

  syncStop(tsSdbObj.sync);
  free(tsSdbObj.sync);
  walClose(tsSdbObj.wal);
  sem_destroy(&tsSdbObj.sem);
  pthread_mutex_destroy(&tsSdbObj.mutex);
  memset(&tsSdbObj, 0, sizeof(tsSdbObj));
}

void sdbIncRef(void *handle, void *pRow) {
  if (pRow) {
    SSdbTable *pTable = handle;
    int32_t *  pRefCount = (int32_t *)(pRow + pTable->refCountPos);
    atomic_add_fetch_32(pRefCount, 1);
    if (0 && strcmp(pTable->tableName, "accounts") == 0) {
      sdbTrace("table:%s, add ref to record:%s:%s:%d", pTable->tableName, pTable->tableName, sdbGetkeyStr(pTable, pRow),
               *pRefCount);
    }
  }
}

void sdbDecRef(void *handle, void *pRow) {
  if (pRow) {
    SSdbTable *pTable = handle;
    int32_t *  pRefCount = (int32_t *)(pRow + pTable->refCountPos);
    int32_t    refCount = atomic_sub_fetch_32(pRefCount, 1);
    if (0 && strcmp(pTable->tableName, "accounts") == 0) {
      sdbTrace("table:%s, def ref of record:%s:%s:%d", pTable->tableName, pTable->tableName, sdbGetkeyStr(pTable, pRow),
               *pRefCount);
    }
    int8_t *updateEnd = pRow + pTable->refCountPos - 1;
    if (refCount <= 0 && *updateEnd) {
      sdbTrace("table:%s, record:%s:%s:%d is destroyed", pTable->tableName, pTable->tableName,
               sdbGetkeyStr(pTable, pRow), *pRefCount);
      SSdbOper oper = {.pObj = pRow};
      (*pTable->destroyFp)(&oper);
    }
  }
}

static SSdbRow *sdbGetRowMeta(void *handle, void *key) {
  SSdbTable *pTable = (SSdbTable *)handle;
  SSdbRow * pMeta;

  if (handle == NULL) return NULL;

  pMeta = (*sdbGetIndexFp[pTable->keyType])(pTable->iHandle, key);

  return pMeta;
}

void *sdbGetRow(void *handle, void *key) {
  SSdbTable *pTable = (SSdbTable *)handle;
  SSdbRow * pMeta;

  if (handle == NULL) return NULL;

  pthread_mutex_lock(&pTable->mutex);
  pMeta = (*sdbGetIndexFp[pTable->keyType])(pTable->iHandle, key);
  if (pMeta) sdbIncRef(pTable, pMeta->row);
  pthread_mutex_unlock(&pTable->mutex);

  if (pMeta == NULL) {
    return NULL;
  }

  return pMeta->row;
}

static int32_t sdbInsertHash(SSdbTable *pTable, SSdbOper *pOper) {
  SSdbRow rowMeta;
  rowMeta.rowSize = pOper->rowSize;
  rowMeta.row = pOper->pObj;

  pthread_mutex_lock(&pTable->mutex);
  (*sdbAddIndexFp[pTable->keyType])(pTable->iHandle, pOper->pObj, &rowMeta);
  sdbIncRef(pTable, pOper->pObj);
  pTable->numOfRows++;

  if (pTable->keyType == SDB_KEY_AUTO) {
    pTable->autoIndex = MAX(pTable->autoIndex, *((uint32_t *)pOper->pObj));
  } else {
    pTable->autoIndex++;
  }

  pthread_mutex_unlock(&pTable->mutex);

  sdbTrace("table:%s, insert record:%s to hash, numOfRows:%d version:%" PRIu64, pTable->tableName,
           sdbGetkeyStr(pTable, pOper->pObj), pTable->numOfRows, sdbGetVersion());

  (*pTable->insertFp)(pOper);
  return TSDB_CODE_SUCCESS;
}

static int32_t sdbDeleteHash(SSdbTable *pTable, SSdbOper *pOper) {
  (*pTable->deleteFp)(pOper);
  
  pthread_mutex_lock(&pTable->mutex);
  (*sdbDeleteIndexFp[pTable->keyType])(pTable->iHandle, pOper->pObj);
  pTable->numOfRows--;
  pthread_mutex_unlock(&pTable->mutex);

  sdbTrace("table:%s, delete record:%s from hash, numOfRows:%d version:%" PRIu64, pTable->tableName,
           sdbGetkeyStr(pTable, pOper->pObj), pTable->numOfRows, sdbGetVersion());

  int8_t *updateEnd = pOper->pObj + pTable->refCountPos - 1;
  *updateEnd = 1;
  sdbDecRef(pTable, pOper->pObj);

  return TSDB_CODE_SUCCESS;
}

static int32_t sdbUpdateHash(SSdbTable *pTable, SSdbOper *pOper) {
  sdbTrace("table:%s, update record:%s in hash, numOfRows:%d version:%" PRIu64, pTable->tableName,
           sdbGetkeyStr(pTable, pOper->pObj), pTable->numOfRows, sdbGetVersion());

  (*pTable->updateFp)(pOper);
  return TSDB_CODE_SUCCESS;
}

static int sdbWrite(void *param, void *data, int type) {
  SWalHead *pHead = data;
  int32_t   tableId = pHead->msgType / 10;
  int32_t   action = pHead->msgType % 10;

  SSdbTable *pTable = sdbGetTableFromId(tableId);
  assert(pTable != NULL);

  pthread_mutex_lock(&tsSdbObj.mutex);
  if (pHead->version == 0) {
     // assign version
    tsSdbObj.version++;
    pHead->version = tsSdbObj.version;
  } else {
    // for data from WAL or forward, version may be smaller
    if (pHead->version <= tsSdbObj.version) {
      pthread_mutex_unlock(&tsSdbObj.mutex);
      return TSDB_CODE_SUCCESS;
    } else if (pHead->version != tsSdbObj.version + 1) {
      pthread_mutex_unlock(&tsSdbObj.mutex);
      sdbError("table:%s, failed to restore %s record:%s from wal, version:%" PRId64 " too large, sdb version:%" PRId64,
               pTable->tableName, sdbGetActionStr(action), sdbGetkeyStr(pTable, pHead->cont), pHead->version,
               tsSdbObj.version);
      return TSDB_CODE_OTHERS;
    } else {
      tsSdbObj.version = pHead->version;
    }
  }

  int32_t code = walWrite(tsSdbObj.wal, pHead);
  if (code < 0) {
    pthread_mutex_unlock(&tsSdbObj.mutex);
    return code;
  }
  walFsync(tsSdbObj.wal);

  sdbForwardToPeer(pHead);
  pthread_mutex_unlock(&tsSdbObj.mutex);

  // from app, oper is created
  if (param != NULL) return code;

  // from wal or forward msg, should create oper
  if (tsSdbObj.sync != NULL) {
    syncConfirmForward(tsSdbObj.sync, pHead->version, code);
  }

  if (action == SDB_ACTION_INSERT) {
    SSdbOper oper = {.rowSize = pHead->len, .rowData = pHead->cont, .table = pTable};
    code = (*pTable->decodeFp)(&oper);
    return sdbInsertHash(pTable, &oper);
  } else if (action == SDB_ACTION_DELETE) {
    SSdbRow *rowMeta = sdbGetRowMeta(pTable, pHead->cont);
    assert(rowMeta != NULL && rowMeta->row != NULL);
    SSdbOper oper = {.table = pTable, .pObj = rowMeta->row};
    return sdbDeleteHash(pTable, &oper);
  } else if (action == SDB_ACTION_UPDATE) {
    SSdbRow *rowMeta = sdbGetRowMeta(pTable, pHead->cont);
    assert(rowMeta != NULL && rowMeta->row != NULL);
    SSdbOper oper = {.rowSize = pHead->len, .rowData = pHead->cont, .table = pTable};
    code = (*pTable->decodeFp)(&oper);
    return sdbUpdateHash(pTable, &oper);
  } else { return TSDB_CODE_INVALID_MSG_TYPE; }
}

int32_t sdbInsertRow(SSdbOper *pOper) {
  SSdbTable *pTable = (SSdbTable *)pOper->table;
  if (pTable == NULL) return -1;

  if (sdbGetRow(pTable, pOper->pObj)) {
    sdbError("table:%s, failed to insert record:%s, already exist", pTable->tableName, sdbGetkeyStr(pTable, pOper->pObj));
    sdbDecRef(pTable, pOper->pObj);
    return TSDB_CODE_ALREADY_THERE;
  }

  if (pTable->keyType == SDB_KEY_AUTO) {
    pthread_mutex_lock(&pTable->mutex);
    *((uint32_t *)pOper->pObj) = ++pTable->autoIndex;

    // let vgId increase from 2
    if (pTable->autoIndex == 1 && strcmp(pTable->tableName, "vgroups") == 0) {
      *((uint32_t *)pOper->pObj) = ++pTable->autoIndex;
    }
    pthread_mutex_unlock(&pTable->mutex);
  }

  if (pOper->type == SDB_OPER_GLOBAL) {
    int32_t   size = sizeof(SWalHead) + pTable->maxRowSize;
    SWalHead *pHead = taosAllocateQitem(size);
    pHead->version = 0;
    pHead->len = pOper->rowSize;
    pHead->msgType = pTable->tableId * 10 + SDB_ACTION_INSERT;

    pOper->rowData = pHead->cont;
    (*pTable->encodeFp)(pOper);
    pHead->len = pOper->rowSize;

    int32_t code = sdbWrite(pOper, pHead, pHead->msgType);
    taosFreeQitem(pHead);
    if (code < 0) return code;
  }

  return sdbInsertHash(pTable, pOper);
}

int32_t sdbDeleteRow(SSdbOper *pOper) {
  SSdbTable *pTable = (SSdbTable *)pOper->table;
  if (pTable == NULL) return -1;

  SSdbRow *pMeta = sdbGetRowMeta(pTable, pOper->pObj);
  if (pMeta == NULL) {
    sdbTrace("table:%s, record is not there, delete failed", pTable->tableName);
    return -1;
  }

  void * pMetaRow = pMeta->row;
  assert(pMetaRow != NULL);

  if (pOper->type == SDB_OPER_GLOBAL) {
    int32_t rowSize = 0;
    switch (pTable->keyType) {
      case SDB_KEY_STRING:
        rowSize = strlen((char *)pOper->pObj) + 1;
        break;
      case SDB_KEY_INT:
      case SDB_KEY_AUTO:
        rowSize = sizeof(uint64_t);
        break;
      default:
        return -1;
    }

    int32_t   size = sizeof(SWalHead) + rowSize;
    SWalHead *pHead = taosAllocateQitem(size);
    pHead->version = 0;
    pHead->len = rowSize;
    pHead->msgType = pTable->tableId * 10 + SDB_ACTION_DELETE;
    memcpy(pHead->cont, pOper->pObj, rowSize);

    int32_t code = sdbWrite(pOper, pHead, pHead->msgType);
    taosFreeQitem(pHead);
    if (code < 0) return code;
  }

  return sdbDeleteHash(pTable, pOper);
}

int32_t sdbUpdateRow(SSdbOper *pOper) {
  SSdbTable *pTable = (SSdbTable *)pOper->table;
  if (pTable == NULL) return -1;

  SSdbRow *pMeta = sdbGetRowMeta(pTable, pOper->pObj);
  if (pMeta == NULL) {
    sdbTrace("table:%s, record is not there, delete failed", pTable->tableName);
    return -1;
  }

  void * pMetaRow = pMeta->row;
  assert(pMetaRow != NULL);

  if (pOper->type == SDB_OPER_GLOBAL) {
    int32_t   size = sizeof(SWalHead) + pTable->maxRowSize;
    SWalHead *pHead = taosAllocateQitem(size);
    pHead->version = 0;
    pHead->msgType = pTable->tableId * 10 + SDB_ACTION_UPDATE;

    pOper->rowData = pHead->cont;
    (*pTable->encodeFp)(pOper);
    pHead->len = pOper->rowSize;

    int32_t code = sdbWrite(pOper, pHead, pHead->msgType);
    taosFreeQitem(pHead);
    if (code < 0) return code;
  } 
  
  return sdbUpdateHash(pTable, pOper);
}

void *sdbFetchRow(void *handle, void *pNode, void **ppRow) {
  SSdbTable *pTable = (SSdbTable *)handle;
  SSdbRow * pMeta;

  *ppRow = NULL;
  if (pTable == NULL) return NULL;

  pNode = (*sdbFetchRowFp[pTable->keyType])(pTable->iHandle, pNode, (void **)&pMeta);
  if (pMeta == NULL) return NULL;

  *ppRow = pMeta->row;
  sdbIncRef(handle, pMeta->row);

  return pNode;
}

void *sdbOpenTable(SSdbTableDesc *pDesc) {
  SSdbTable *pTable = (SSdbTable *)calloc(1, sizeof(SSdbTable));
  
  if (pTable == NULL) return NULL;

  strcpy(pTable->tableName, pDesc->tableName);
  pTable->keyType      = pDesc->keyType;
  pTable->tableId      = pDesc->tableId;
  pTable->hashSessions = pDesc->hashSessions;
  pTable->maxRowSize   = pDesc->maxRowSize;
  pTable->refCountPos  = pDesc->refCountPos;
  pTable->insertFp     = pDesc->insertFp;
  pTable->deleteFp     = pDesc->deleteFp;
  pTable->updateFp     = pDesc->updateFp;
  pTable->encodeFp     = pDesc->encodeFp;
  pTable->decodeFp     = pDesc->decodeFp;
  pTable->destroyFp    = pDesc->destroyFp;
  pTable->restoredFp   = pDesc->restoredFp;
  
  if (sdbInitIndexFp[pTable->keyType] != NULL) {
    pTable->iHandle = (*sdbInitIndexFp[pTable->keyType])(pTable->maxRowSize, sizeof(SSdbRow));
  }

  pthread_mutex_init(&pTable->mutex, NULL);

  tsSdbObj.numOfTables++;
  tsSdbObj.tableList[pTable->tableId] = pTable;
  return pTable;
}

void sdbCloseTable(void *handle) {
  SSdbTable *pTable = (SSdbTable *)handle;
  if (pTable == NULL) return;
  
  tsSdbObj.numOfTables--;
  tsSdbObj.tableList[pTable->tableId] = NULL;

  void *pNode = NULL;
  while (1) {
    SSdbRow *pMeta;
    pNode = (*sdbFetchRowFp[pTable->keyType])(pTable->iHandle, pNode, (void **)&pMeta);
    if (pMeta == NULL) break;

    SSdbOper oper = {
      .pObj = pMeta->row,
      .table = pTable,
    };

    (*pTable->destroyFp)(&oper);
  }

  if (sdbCleanUpIndexFp[pTable->keyType]) {
    (*sdbCleanUpIndexFp[pTable->keyType])(pTable->iHandle);
  }

  pthread_mutex_destroy(&pTable->mutex);
  
  sdbTrace("table:%s, is closed, numOfTables:%d", pTable->tableName, tsSdbObj.numOfTables);
  free(pTable);
}

