tsdbMemTable.c 17.1 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
H
Hongze Cheng 已提交
14 15
 */

H
Hongze Cheng 已提交
16
#include "tsdb.h"
H
Hongze Cheng 已提交
17

H
Hongze Cheng 已提交
18
#define SL_MAX_LEVEL 5
H
Hongze Cheng 已提交
19

H
Hongze Cheng 已提交
20 21 22 23
#define SL_NODE_SIZE(l)        (sizeof(SMemSkipListNode) + sizeof(SMemSkipListNode *) * (l)*2)
#define SL_NODE_FORWARD(n, l)  ((n)->forwards[l])
#define SL_NODE_BACKWARD(n, l) ((n)->forwards[(n)->level + (l)])
#define SL_NODE_DATA(n)        (&SL_NODE_BACKWARD(n, (n)->level))
H
Hongze Cheng 已提交
24

H
Hongze Cheng 已提交
25 26 27 28 29 30 31 32 33 34 35
#define SL_MOVE_BACKWARD 0x1
#define SL_MOVE_FROM_POS 0x2

static void    tbDataMovePosTo(STbData *pTbData, SMemSkipListNode **pos, TSDBKEY *pKey, int32_t flags);
static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData);
static int32_t tsdbInsertTableDataImpl(SMemTable *pMemTable, STbData *pTbData, int64_t version,
                                       SSubmitMsgIter *pMsgIter, SSubmitBlk *pBlock, SSubmitBlkRsp *pRsp);

int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable) {
  int32_t    code = 0;
  SMemTable *pMemTable = NULL;
H
Hongze Cheng 已提交
36

H
refact  
Hongze Cheng 已提交
37
  pMemTable = (SMemTable *)taosMemoryCalloc(1, sizeof(*pMemTable));
H
Hongze Cheng 已提交
38
  if (pMemTable == NULL) {
H
Hongze Cheng 已提交
39 40
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
H
Hongze Cheng 已提交
41
  }
H
Hongze Cheng 已提交
42
  taosInitRWLatch(&pMemTable->latch);
H
Hongze Cheng 已提交
43
  pMemTable->pTsdb = pTsdb;
H
Hongze Cheng 已提交
44
  pMemTable->pPool = pTsdb->pVnode->inUse;
H
Hongze Cheng 已提交
45
  pMemTable->nRef = 1;
H
Hongze Cheng 已提交
46 47 48 49
  pMemTable->minKey = TSKEY_MAX;
  pMemTable->maxKey = TSKEY_MIN;
  pMemTable->minVersion = VERSION_MAX;
  pMemTable->maxVersion = VERSION_MIN;
H
Hongze Cheng 已提交
50
  pMemTable->nRow = 0;
H
Hongze Cheng 已提交
51
  pMemTable->nDel = 0;
H
Hongze Cheng 已提交
52 53 54
  pMemTable->aTbData = taosArrayInit(128, sizeof(STbData *));
  if (pMemTable->aTbData == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
wafwerar's avatar
wafwerar 已提交
55
    taosMemoryFree(pMemTable);
H
Hongze Cheng 已提交
56
    goto _err;
H
Hongze Cheng 已提交
57
  }
H
Hongze Cheng 已提交
58
  vnodeBufPoolRef(pMemTable->pPool);
H
Hongze Cheng 已提交
59

H
Hongze Cheng 已提交
60 61 62 63 64 65 66 67 68 69
  *ppMemTable = pMemTable;
  return code;

_err:
  *ppMemTable = NULL;
  return code;
}

void tsdbMemTableDestroy(SMemTable *pMemTable) {
  if (pMemTable) {
H
Hongze Cheng 已提交
70
    vnodeBufPoolUnRef(pMemTable->pPool);
H
Hongze Cheng 已提交
71
    taosArrayDestroy(pMemTable->aTbData);
wafwerar's avatar
wafwerar 已提交
72
    taosMemoryFree(pMemTable);
H
Hongze Cheng 已提交
73 74 75 76 77 78 79 80
  }
}

static int32_t tbDataPCmprFn(const void *p1, const void *p2) {
  STbData *pTbData1 = *(STbData **)p1;
  STbData *pTbData2 = *(STbData **)p2;

  if (pTbData1->suid < pTbData2->suid) {
H
Hongze Cheng 已提交
81
    return -1;
H
Hongze Cheng 已提交
82 83 84 85 86 87 88 89
  } else if (pTbData1->suid > pTbData2->suid) {
    return 1;
  }

  if (pTbData1->uid < pTbData2->uid) {
    return -1;
  } else if (pTbData1->uid > pTbData2->uid) {
    return 1;
H
Hongze Cheng 已提交
90
  }
H
Hongze Cheng 已提交
91

H
Hongze Cheng 已提交
92
  return 0;
H
Hongze Cheng 已提交
93
}
H
Hongze Cheng 已提交
94 95
void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData) {
  STbData *pTbData = &(STbData){.suid = suid, .uid = uid};
H
Hongze Cheng 已提交
96 97 98 99 100

  taosRLockLatch(&pMemTable->latch);
  void *p = taosArraySearch(pMemTable->aTbData, &pTbData, tbDataPCmprFn, TD_EQ);
  taosRUnLockLatch(&pMemTable->latch);

H
Hongze Cheng 已提交
101 102
  *ppTbData = p ? *(STbData **)p : NULL;
}
H
Hongze Cheng 已提交
103

H
Hongze Cheng 已提交
104 105 106 107 108 109 110 111 112 113 114
int32_t tsdbInsertTableData(STsdb *pTsdb, int64_t version, SSubmitMsgIter *pMsgIter, SSubmitBlk *pBlock,
                            SSubmitBlkRsp *pRsp) {
  int32_t    code = 0;
  SMemTable *pMemTable = pTsdb->mem;
  STbData   *pTbData = NULL;
  tb_uid_t   suid = pMsgIter->suid;
  tb_uid_t   uid = pMsgIter->uid;
  int32_t    sverNew;

  // check if table exists (todo: refact)
  SMetaReader mr = {0};
115
  // SMetaEntry  me = {0};
H
Hongze Cheng 已提交
116 117 118 119 120 121 122 123 124 125 126
  metaReaderInit(&mr, pTsdb->pVnode->pMeta, 0);
  if (metaGetTableEntryByUid(&mr, pMsgIter->uid) < 0) {
    metaReaderClear(&mr);
    code = TSDB_CODE_PAR_TABLE_NOT_EXIST;
    goto _err;
  }
  if (pRsp->tblFName) strcat(pRsp->tblFName, mr.me.name);

  if (mr.me.type == TSDB_NORMAL_TABLE) {
    sverNew = mr.me.ntbEntry.schemaRow.version;
  } else {
127 128
    tDecoderClear(&mr.coder);

H
Hongze Cheng 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
    metaGetTableEntryByUid(&mr, mr.me.ctbEntry.suid);
    sverNew = mr.me.stbEntry.schemaRow.version;
  }
  metaReaderClear(&mr);
  pRsp->sver = sverNew;

  // create/get STbData to op
  code = tsdbGetOrCreateTbData(pMemTable, suid, uid, &pTbData);
  if (code) {
    goto _err;
  }

  // do insert impl
  code = tsdbInsertTableDataImpl(pMemTable, pTbData, version, pMsgIter, pBlock, pRsp);
  if (code) {
    goto _err;
  }

  return code;

_err:
  return code;
}

int32_t tsdbDeleteTableData(STsdb *pTsdb, int64_t version, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKEY eKey) {
  int32_t    code = 0;
  SMemTable *pMemTable = pTsdb->mem;
  STbData   *pTbData = NULL;
  SVBufPool *pPool = pTsdb->pVnode->inUse;
158
  TSDBKEY    lastKey = {.version = version, .ts = eKey};
H
Hongze Cheng 已提交
159 160 161 162 163 164 165 166 167

  // check if table exists (todo)

  code = tsdbGetOrCreateTbData(pMemTable, suid, uid, &pTbData);
  if (code) {
    goto _err;
  }

  // do delete
H
Hongze Cheng 已提交
168 169
  SDelData *pDelData = (SDelData *)vnodeBufPoolMalloc(pPool, sizeof(*pDelData));
  if (pDelData == NULL) {
H
Hongze Cheng 已提交
170 171 172
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
  }
H
Hongze Cheng 已提交
173 174 175 176
  pDelData->version = version;
  pDelData->sKey = sKey;
  pDelData->eKey = eKey;
  pDelData->pNext = NULL;
H
Hongze Cheng 已提交
177 178
  if (pTbData->pHead == NULL) {
    ASSERT(pTbData->pTail == NULL);
H
Hongze Cheng 已提交
179
    pTbData->pHead = pTbData->pTail = pDelData;
H
Hongze Cheng 已提交
180
  } else {
H
Hongze Cheng 已提交
181 182
    pTbData->pTail->pNext = pDelData;
    pTbData->pTail = pDelData;
H
Hongze Cheng 已提交
183 184 185 186
  }

  // update the state of pMemTable and other (todo)

H
Hongze Cheng 已提交
187 188
  pMemTable->minVersion = TMIN(pMemTable->minVersion, version);
  pMemTable->maxVersion = TMAX(pMemTable->maxVersion, version);
H
Hongze Cheng 已提交
189
  pMemTable->nDel++;
H
Hongze Cheng 已提交
190

191 192 193 194 195 196
  if (TSDB_CACHE_LAST_ROW(pMemTable->pTsdb->pVnode->config) && tsdbKeyCmprFn(&lastKey, &pTbData->maxKey) >= 0) {
    tsdbCacheDeleteLastrow(pTsdb->lruCache, pTbData->uid, eKey);
  }

  if (TSDB_CACHE_LAST(pMemTable->pTsdb->pVnode->config)) {
    tsdbCacheDeleteLast(pTsdb->lruCache, pTbData->uid, eKey);
197 198
  }

C
Cary Xu 已提交
199
  tsdbError("vgId:%d, delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64
H
Hongze Cheng 已提交
200 201 202 203 204
            " since %s",
            TD_VID(pTsdb->pVnode), suid, uid, sKey, eKey, tstrerror(code));
  return code;

_err:
C
Cary Xu 已提交
205
  tsdbError("vgId:%d, failed to delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64
H
Hongze Cheng 已提交
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
            " since %s",
            TD_VID(pTsdb->pVnode), suid, uid, sKey, eKey, tstrerror(code));
  return code;
}

int32_t tsdbTbDataIterCreate(STbData *pTbData, TSDBKEY *pFrom, int8_t backward, STbDataIter **ppIter) {
  int32_t code = 0;

  (*ppIter) = (STbDataIter *)taosMemoryCalloc(1, sizeof(STbDataIter));
  if ((*ppIter) == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _exit;
  }

  tsdbTbDataIterOpen(pTbData, pFrom, backward, *ppIter);

_exit:
  return code;
}

void *tsdbTbDataIterDestroy(STbDataIter *pIter) {
  if (pIter) {
    taosMemoryFree(pIter);
  }

  return NULL;
}

H
Hongze Cheng 已提交
234
void tsdbTbDataIterOpen(STbData *pTbData, TSDBKEY *pFrom, int8_t backward, STbDataIter *pIter) {
H
Hongze Cheng 已提交
235
  SMemSkipListNode *pos[SL_MAX_LEVEL];
H
Hongze Cheng 已提交
236 237
  SMemSkipListNode *pHead;
  SMemSkipListNode *pTail;
H
Hongze Cheng 已提交
238

H
Hongze Cheng 已提交
239 240
  pHead = pTbData->sl.pHead;
  pTail = pTbData->sl.pTail;
H
Hongze Cheng 已提交
241 242
  pIter->pTbData = pTbData;
  pIter->backward = backward;
H
Hongze Cheng 已提交
243
  pIter->pRow = NULL;
H
Hongze Cheng 已提交
244
  pIter->row.type = 0;
H
Hongze Cheng 已提交
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
  if (pFrom == NULL) {
    // create from head or tail
    if (backward) {
      pIter->pNode = SL_NODE_BACKWARD(pTbData->sl.pTail, 0);
    } else {
      pIter->pNode = SL_NODE_FORWARD(pTbData->sl.pHead, 0);
    }
  } else {
    // create from a key
    if (backward) {
      tbDataMovePosTo(pTbData, pos, pFrom, SL_MOVE_BACKWARD);
      pIter->pNode = SL_NODE_BACKWARD(pos[0], 0);
    } else {
      tbDataMovePosTo(pTbData, pos, pFrom, 0);
      pIter->pNode = SL_NODE_FORWARD(pos[0], 0);
    }
  }
}

bool tsdbTbDataIterNext(STbDataIter *pIter) {
  SMemSkipListNode *pHead = pIter->pTbData->sl.pHead;
  SMemSkipListNode *pTail = pIter->pTbData->sl.pTail;

H
Hongze Cheng 已提交
268
  pIter->pRow = NULL;
H
Hongze Cheng 已提交
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
  if (pIter->backward) {
    ASSERT(pIter->pNode != pTail);

    if (pIter->pNode == pHead) {
      return false;
    }

    pIter->pNode = SL_NODE_BACKWARD(pIter->pNode, 0);
    if (pIter->pNode == pHead) {
      return false;
    }
  } else {
    ASSERT(pIter->pNode != pHead);

    if (pIter->pNode == pTail) {
      return false;
    }

    pIter->pNode = SL_NODE_FORWARD(pIter->pNode, 0);
    if (pIter->pNode == pTail) {
      return false;
    }
  }

  return true;
}

H
Hongze Cheng 已提交
296
TSDBROW *tsdbTbDataIterGet(STbDataIter *pIter) {
H
Hongze Cheng 已提交
297 298 299
  // we add here for commit usage
  if (pIter == NULL) return NULL;

H
Hongze Cheng 已提交
300 301
  if (pIter->pRow) {
    goto _exit;
H
Hongze Cheng 已提交
302 303 304
  }

  if (pIter->backward) {
H
Hongze Cheng 已提交
305 306
    if (pIter->pNode == pIter->pTbData->sl.pHead) {
      goto _exit;
H
Hongze Cheng 已提交
307 308
    }
  } else {
H
Hongze Cheng 已提交
309 310
    if (pIter->pNode == pIter->pTbData->sl.pTail) {
      goto _exit;
H
Hongze Cheng 已提交
311
    }
H
Hongze Cheng 已提交
312
  }
H
Hongze Cheng 已提交
313

H
Hongze Cheng 已提交
314 315 316 317 318
  tGetTSDBRow((uint8_t *)SL_NODE_DATA(pIter->pNode), &pIter->row);
  pIter->pRow = &pIter->row;

_exit:
  return pIter->pRow;
H
Hongze Cheng 已提交
319
}
H
Hongze Cheng 已提交
320

H
Hongze Cheng 已提交
321 322 323 324 325 326 327 328 329 330 331
static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData) {
  int32_t  code = 0;
  int32_t  idx = 0;
  STbData *pTbData = NULL;
  STbData *pTbDataT = &(STbData){.suid = suid, .uid = uid};

  // get
  idx = taosArraySearchIdx(pMemTable->aTbData, &pTbDataT, tbDataPCmprFn, TD_GE);
  if (idx >= 0) {
    pTbData = (STbData *)taosArrayGetP(pMemTable->aTbData, idx);
    if (tbDataPCmprFn(&pTbDataT, &pTbData) == 0) goto _exit;
H
Hongze Cheng 已提交
332
  }
333

H
Hongze Cheng 已提交
334 335 336 337 338 339 340 341
  // create
  SVBufPool *pPool = pMemTable->pTsdb->pVnode->inUse;
  int8_t     maxLevel = pMemTable->pTsdb->pVnode->config.tsdbCfg.slLevel;

  pTbData = vnodeBufPoolMalloc(pPool, sizeof(*pTbData) + SL_NODE_SIZE(maxLevel) * 2);
  if (pTbData == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
H
Hongze Cheng 已提交
342
  }
H
Hongze Cheng 已提交
343 344
  pTbData->suid = suid;
  pTbData->uid = uid;
H
Hongze Cheng 已提交
345 346 347 348
  pTbData->minKey = TSKEY_MAX;
  pTbData->maxKey = TSKEY_MIN;
  pTbData->minVersion = VERSION_MAX;
  pTbData->maxVersion = VERSION_MIN;
H
Hongze Cheng 已提交
349
  pTbData->maxSkmVer = -1;
H
Hongze Cheng 已提交
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
  pTbData->pHead = NULL;
  pTbData->pTail = NULL;
  pTbData->sl.seed = taosRand();
  pTbData->sl.size = 0;
  pTbData->sl.maxLevel = maxLevel;
  pTbData->sl.level = 0;
  pTbData->sl.pHead = (SMemSkipListNode *)&pTbData[1];
  pTbData->sl.pTail = (SMemSkipListNode *)POINTER_SHIFT(pTbData->sl.pHead, SL_NODE_SIZE(maxLevel));
  pTbData->sl.pHead->level = maxLevel;
  pTbData->sl.pTail->level = maxLevel;
  for (int8_t iLevel = 0; iLevel < maxLevel; iLevel++) {
    SL_NODE_FORWARD(pTbData->sl.pHead, iLevel) = pTbData->sl.pTail;
    SL_NODE_BACKWARD(pTbData->sl.pTail, iLevel) = pTbData->sl.pHead;

    SL_NODE_BACKWARD(pTbData->sl.pHead, iLevel) = NULL;
    SL_NODE_FORWARD(pTbData->sl.pTail, iLevel) = NULL;
  }

H
Hongze Cheng 已提交
368 369
  void *p;
  if (idx < 0) {
H
Hongze Cheng 已提交
370
    idx = taosArrayGetSize(pMemTable->aTbData);
H
Hongze Cheng 已提交
371
  }
H
Hongze Cheng 已提交
372 373 374 375 376

  taosWLockLatch(&pMemTable->latch);
  p = taosArrayInsert(pMemTable->aTbData, idx, &pTbData);
  taosWUnLockLatch(&pMemTable->latch);

H
Hongze Cheng 已提交
377 378
  tsdbDebug("vgId:%d add table data %p at idx:%d", TD_VID(pMemTable->pTsdb->pVnode), pTbData, idx);

H
Hongze Cheng 已提交
379
  if (p == NULL) {
H
Hongze Cheng 已提交
380 381 382 383 384 385
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
  }
_exit:
  *ppTbData = pTbData;
  return code;
H
Hongze Cheng 已提交
386

H
Hongze Cheng 已提交
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
_err:
  *ppTbData = NULL;
  return code;
}

static void tbDataMovePosTo(STbData *pTbData, SMemSkipListNode **pos, TSDBKEY *pKey, int32_t flags) {
  SMemSkipListNode *px;
  SMemSkipListNode *pn;
  TSDBKEY          *pTKey;
  int               c;
  int               backward = flags & SL_MOVE_BACKWARD;
  int               fromPos = flags & SL_MOVE_FROM_POS;

  if (backward) {
    px = pTbData->sl.pTail;

    for (int8_t iLevel = pTbData->sl.maxLevel - 1; iLevel >= pTbData->sl.level; iLevel--) {
      pos[iLevel] = px;
H
Hongze Cheng 已提交
405 406
    }

H
Hongze Cheng 已提交
407 408
    if (pTbData->sl.level) {
      if (fromPos) px = pos[pTbData->sl.level - 1];
H
Hongze Cheng 已提交
409

H
Hongze Cheng 已提交
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
      for (int8_t iLevel = pTbData->sl.level - 1; iLevel >= 0; iLevel--) {
        pn = SL_NODE_BACKWARD(px, iLevel);
        while (pn != pTbData->sl.pHead) {
          pTKey = (TSDBKEY *)SL_NODE_DATA(pn);

          c = tsdbKeyCmprFn(pTKey, pKey);
          if (c <= 0) {
            break;
          } else {
            px = pn;
            pn = SL_NODE_BACKWARD(px, iLevel);
          }
        }

        pos[iLevel] = px;
      }
    }
H
Hongze Cheng 已提交
427
  } else {
H
Hongze Cheng 已提交
428 429 430 431 432 433 434 435 436 437 438
    px = pTbData->sl.pHead;

    for (int8_t iLevel = pTbData->sl.maxLevel - 1; iLevel >= pTbData->sl.level; iLevel--) {
      pos[iLevel] = px;
    }

    if (pTbData->sl.level) {
      if (fromPos) px = pos[pTbData->sl.level - 1];

      for (int8_t iLevel = pTbData->sl.level - 1; iLevel >= 0; iLevel--) {
        pn = SL_NODE_FORWARD(px, iLevel);
H
Hongze Cheng 已提交
439
        while (pn != pTbData->sl.pTail) {
H
Hongze Cheng 已提交
440 441 442 443 444 445 446 447 448 449 450 451 452 453
          pTKey = (TSDBKEY *)SL_NODE_DATA(pn);

          c = tsdbKeyCmprFn(pTKey, pKey);
          if (c >= 0) {
            break;
          } else {
            px = pn;
            pn = SL_NODE_FORWARD(px, iLevel);
          }
        }

        pos[iLevel] = px;
      }
    }
H
Hongze Cheng 已提交
454
  }
H
Hongze Cheng 已提交
455
}
H
Hongze Cheng 已提交
456

H
Hongze Cheng 已提交
457 458 459 460
static FORCE_INLINE int8_t tsdbMemSkipListRandLevel(SMemSkipList *pSl) {
  int8_t         level = 1;
  int8_t         tlevel = TMIN(pSl->maxLevel, pSl->level + 1);
  const uint32_t factor = 4;
H
Hongze Cheng 已提交
461

H
Hongze Cheng 已提交
462 463 464
  while ((taosRandR(&pSl->seed) % factor) == 0 && level < tlevel) {
    level++;
  }
H
Hongze Cheng 已提交
465

H
Hongze Cheng 已提交
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
  return level;
}
static int32_t tbDataDoPut(SMemTable *pMemTable, STbData *pTbData, SMemSkipListNode **pos, TSDBROW *pRow,
                           int8_t forward) {
  int32_t           code = 0;
  int8_t            level;
  SMemSkipListNode *pNode;
  SVBufPool        *pPool = pMemTable->pTsdb->pVnode->inUse;

  // node
  level = tsdbMemSkipListRandLevel(&pTbData->sl);
  pNode = (SMemSkipListNode *)vnodeBufPoolMalloc(pPool, SL_NODE_SIZE(level) + tPutTSDBRow(NULL, pRow));
  if (pNode == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _exit;
  }
  pNode->level = level;
  for (int8_t iLevel = 0; iLevel < level; iLevel++) {
    SL_NODE_FORWARD(pNode, iLevel) = NULL;
    SL_NODE_BACKWARD(pNode, iLevel) = NULL;
  }
H
Hongze Cheng 已提交
487

H
Hongze Cheng 已提交
488
  tPutTSDBRow((uint8_t *)SL_NODE_DATA(pNode), pRow);
C
Cary Xu 已提交
489

H
Hongze Cheng 已提交
490 491 492
  // put
  for (int8_t iLevel = 0; iLevel < pNode->level; iLevel++) {
    SMemSkipListNode *px = pos[iLevel];
H
Hongze Cheng 已提交
493

H
Hongze Cheng 已提交
494 495
    if (forward) {
      SMemSkipListNode *pNext = SL_NODE_FORWARD(px, iLevel);
H
Hongze Cheng 已提交
496

H
Hongze Cheng 已提交
497 498
      SL_NODE_FORWARD(pNode, iLevel) = pNext;
      SL_NODE_BACKWARD(pNode, iLevel) = px;
H
Hongze Cheng 已提交
499

H
Hongze Cheng 已提交
500 501 502 503
      SL_NODE_BACKWARD(pNext, iLevel) = pNode;
      SL_NODE_FORWARD(px, iLevel) = pNode;
    } else {
      SMemSkipListNode *pPrev = SL_NODE_BACKWARD(px, iLevel);
C
Cary Xu 已提交
504

H
Hongze Cheng 已提交
505 506
      SL_NODE_FORWARD(pNode, iLevel) = px;
      SL_NODE_BACKWARD(pNode, iLevel) = pPrev;
H
Hongze Cheng 已提交
507

H
Hongze Cheng 已提交
508 509 510
      SL_NODE_FORWARD(pPrev, iLevel) = pNode;
      SL_NODE_BACKWARD(px, iLevel) = pNode;
    }
H
Hongze Cheng 已提交
511 512
  }

H
Hongze Cheng 已提交
513 514 515
  pTbData->sl.size++;
  if (pTbData->sl.level < pNode->level) {
    pTbData->sl.level = pNode->level;
H
Hongze Cheng 已提交
516 517
  }

H
Hongze Cheng 已提交
518 519
_exit:
  return code;
H
Hongze Cheng 已提交
520 521
}

H
Hongze Cheng 已提交
522 523 524 525 526 527
static int32_t tsdbInsertTableDataImpl(SMemTable *pMemTable, STbData *pTbData, int64_t version,
                                       SSubmitMsgIter *pMsgIter, SSubmitBlk *pBlock, SSubmitBlkRsp *pRsp) {
  int32_t           code = 0;
  SSubmitBlkIter    blkIter = {0};
  TSDBKEY           key = {.version = version};
  SMemSkipListNode *pos[SL_MAX_LEVEL];
H
Hongze Cheng 已提交
528
  TSDBROW           row = tsdbRowFromTSRow(version, NULL);
H
Hongze Cheng 已提交
529
  int32_t           nRow = 0;
530
  STSRow           *pLastRow = NULL;
H
Hongze Cheng 已提交
531 532 533 534 535 536 537 538 539 540 541

  tInitSubmitBlkIter(pMsgIter, pBlock, &blkIter);

  // backward put first data
  row.pTSRow = tGetSubmitBlkNext(&blkIter);
  key.ts = row.pTSRow->ts;
  nRow++;
  tbDataMovePosTo(pTbData, pos, &key, SL_MOVE_BACKWARD);
  code = tbDataDoPut(pMemTable, pTbData, pos, &row, 0);
  if (code) {
    goto _err;
H
Hongze Cheng 已提交
542 543
  }

H
Hongze Cheng 已提交
544
  pTbData->minKey = TMIN(pTbData->minKey, key.ts);
H
Hongze Cheng 已提交
545

546 547
  pLastRow = row.pTSRow;

H
Hongze Cheng 已提交
548 549 550 551 552 553 554
  // forward put rest data
  row.pTSRow = tGetSubmitBlkNext(&blkIter);
  if (row.pTSRow) {
    for (int8_t iLevel = 0; iLevel < pTbData->sl.maxLevel; iLevel++) {
      pos[iLevel] = SL_NODE_BACKWARD(pos[iLevel], iLevel);
    }
    do {
H
more  
Hongze Cheng 已提交
555
      key.ts = row.pTSRow->ts;
H
Hongze Cheng 已提交
556 557 558 559 560 561 562
      nRow++;
      tbDataMovePosTo(pTbData, pos, &key, SL_MOVE_FROM_POS);
      code = tbDataDoPut(pMemTable, pTbData, pos, &row, 1);
      if (code) {
        goto _err;
      }

563 564
      pLastRow = row.pTSRow;

H
Hongze Cheng 已提交
565 566 567 568
      row.pTSRow = tGetSubmitBlkNext(&blkIter);
    } while (row.pTSRow);
  }

569 570 571 572
  if (key.ts >= pTbData->maxKey) {
    if (key.ts > pTbData->maxKey) {
      pTbData->maxKey = key.ts;
    }
573

574
    if (TSDB_CACHE_LAST_ROW(pMemTable->pTsdb->pVnode->config) && pLastRow != NULL) {
575
      tsdbCacheInsertLastrow(pMemTable->pTsdb->lruCache, pMemTable->pTsdb, pTbData->uid, pLastRow, true);
576
    }
H
Hongze Cheng 已提交
577
  }
578

579 580 581
  if (TSDB_CACHE_LAST(pMemTable->pTsdb->pVnode->config)) {
    tsdbCacheInsertLast(pMemTable->pTsdb->lruCache, pTbData->uid, pLastRow, pMemTable->pTsdb);
  }
582

H
Hongze Cheng 已提交
583 584
  pTbData->minVersion = TMIN(pTbData->minVersion, version);
  pTbData->maxVersion = TMAX(pTbData->maxVersion, version);
H
Hongze Cheng 已提交
585
  pTbData->maxSkmVer = TMAX(pTbData->maxSkmVer, pMsgIter->sversion);
H
Hongze Cheng 已提交
586 587

  // SMemTable
H
Hongze Cheng 已提交
588 589 590 591
  pMemTable->minKey = TMIN(pMemTable->minKey, pTbData->minKey);
  pMemTable->maxKey = TMAX(pMemTable->maxKey, pTbData->maxKey);
  pMemTable->minVersion = TMIN(pMemTable->minVersion, pTbData->minVersion);
  pMemTable->maxVersion = TMAX(pMemTable->maxVersion, pTbData->maxVersion);
H
Hongze Cheng 已提交
592
  pMemTable->nRow += nRow;
H
Hongze Cheng 已提交
593 594 595 596 597 598 599 600

  pRsp->numOfRows = nRow;
  pRsp->affectedRows = nRow;

  return code;

_err:
  return code;
H
Hongze Cheng 已提交
601
}
H
Hongze Cheng 已提交
602

603
int32_t tsdbGetNRowsInTbData(STbData *pTbData) { return pTbData->sl.size; }
H
Hongze Cheng 已提交
604 605 606 607 608 609 610 611 612 613 614 615

void tsdbRefMemTable(SMemTable *pMemTable) {
  int32_t nRef = atomic_fetch_add_32(&pMemTable->nRef, 1);
  ASSERT(nRef > 0);
}

void tsdbUnrefMemTable(SMemTable *pMemTable) {
  int32_t nRef = atomic_sub_fetch_32(&pMemTable->nRef, 1);
  if (nRef == 0) {
    tsdbMemTableDestroy(pMemTable);
  }
}