tstreamFileState.c 12.8 KB
Newer Older
5
54liuyao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "tstreamFileState.h"

dengyihao's avatar
dengyihao 已提交
18
#include "streamBackendRocksdb.h"
5
54liuyao 已提交
19
#include "taos.h"
dengyihao's avatar
dengyihao 已提交
20
#include "tcommon.h"
5
54liuyao 已提交
21 22 23
#include "thash.h"
#include "tsimplehash.h"

dengyihao's avatar
dengyihao 已提交
24
#define FLUSH_RATIO                    0.2
dengyihao's avatar
dengyihao 已提交
25
#define FLUSH_NUM                      4
5
54liuyao 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38
#define DEFAULT_MAX_STREAM_BUFFER_SIZE (128 * 1024 * 1024);

struct SStreamFileState {
  SList*     usedBuffs;
  SList*     freeBuffs;
  SSHashObj* rowBuffMap;
  void*      pFileStore;
  int32_t    rowSize;
  int32_t    keyLen;
  uint64_t   preCheckPointVersion;
  uint64_t   checkPointVersion;
  TSKEY      maxTs;
  TSKEY      deleteMark;
5
54liuyao 已提交
39
  TSKEY      flushMark;
5
54liuyao 已提交
40 41
  uint64_t   maxRowCount;
  uint64_t   curRowCount;
5
54liuyao 已提交
42
  GetTsFun   getTs;
5
54liuyao 已提交
43 44 45 46
};

typedef SRowBuffPos SRowBuffInfo;

dengyihao's avatar
dengyihao 已提交
47 48
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, GetTsFun fp, void* pFile,
                                      TSKEY delMark) {
5
54liuyao 已提交
49 50 51 52 53 54 55 56 57 58 59
  if (memSize <= 0) {
    memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE;
  }
  if (rowSize == 0) {
    goto _error;
  }

  SStreamFileState* pFileState = taosMemoryCalloc(1, sizeof(SStreamFileState));
  if (!pFileState) {
    goto _error;
  }
dengyihao's avatar
dengyihao 已提交
60
  pFileState->maxRowCount = TMAX((uint64_t)memSize / rowSize, FLUSH_NUM * 2);
5
54liuyao 已提交
61 62 63
  pFileState->usedBuffs = tdListNew(POINTER_BYTES);
  pFileState->freeBuffs = tdListNew(POINTER_BYTES);
  _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY);
dengyihao's avatar
dengyihao 已提交
64
  int32_t    cap = TMIN(10240, pFileState->maxRowCount);
L
liuyao 已提交
65
  pFileState->rowBuffMap = tSimpleHashInit(cap, hashFn);
5
54liuyao 已提交
66 67 68
  if (!pFileState->usedBuffs || !pFileState->freeBuffs || !pFileState->rowBuffMap) {
    goto _error;
  }
L
liuyao 已提交
69
  pFileState->keyLen = keySize;
5
54liuyao 已提交
70 71 72 73
  pFileState->rowSize = rowSize;
  pFileState->preCheckPointVersion = 0;
  pFileState->checkPointVersion = 1;
  pFileState->pFileStore = pFile;
5
54liuyao 已提交
74
  pFileState->getTs = fp;
dengyihao's avatar
dengyihao 已提交
75
  pFileState->maxRowCount = TMAX((uint64_t)memSize / rowSize, FLUSH_NUM * 2);
5
54liuyao 已提交
76
  pFileState->curRowCount = 0;
5
54liuyao 已提交
77 78
  pFileState->deleteMark = delMark;
  pFileState->flushMark = -1;
L
liuyao 已提交
79
  recoverSnapshot(pFileState);
5
54liuyao 已提交
80 81 82
  return pFileState;

_error:
5
54liuyao 已提交
83
  streamFileStateDestroy(pFileState);
5
54liuyao 已提交
84 85 86 87
  return NULL;
}

void destroyRowBuffPos(SRowBuffPos* pPos) {
5
54liuyao 已提交
88
  taosMemoryFreeClear(pPos->pKey);
5
54liuyao 已提交
89 90 91 92 93 94 95 96
  taosMemoryFreeClear(pPos->pRowBuff);
  taosMemoryFree(pPos);
}

void destroyRowBuffPosPtr(void* ptr) {
  if (!ptr) {
    return;
  }
5
54liuyao 已提交
97
  SRowBuffPos* pPos = *(SRowBuffPos**)ptr;
L
liuyao 已提交
98 99 100
  if (!pPos->beUsed) {
    destroyRowBuffPos(pPos);
  }
5
54liuyao 已提交
101 102
}

5
54liuyao 已提交
103 104 105 106 107 108 109 110 111 112 113
void destroyRowBuff(void* ptr) {
  if (!ptr) {
    return;
  }
  taosMemoryFree(*(void**)ptr);
}

void streamFileStateDestroy(SStreamFileState* pFileState) {
  if (!pFileState) {
    return;
  }
5
54liuyao 已提交
114
  tdListFreeP(pFileState->usedBuffs, destroyRowBuffPosPtr);
5
54liuyao 已提交
115
  tdListFreeP(pFileState->freeBuffs, destroyRowBuff);
5
54liuyao 已提交
116
  tSimpleHashCleanup(pFileState->rowBuffMap);
5
54liuyao 已提交
117
  taosMemoryFree(pFileState);
5
54liuyao 已提交
118 119
}

5
54liuyao 已提交
120
void clearExpiredRowBuff(SStreamFileState* pFileState, TSKEY ts, bool all) {
5
54liuyao 已提交
121 122 123 124 125
  SListIter iter = {0};
  tdListInitIter(pFileState->usedBuffs, &iter, TD_LIST_FORWARD);

  SListNode* pNode = NULL;
  while ((pNode = tdListNext(&iter)) != NULL) {
5
54liuyao 已提交
126
    SRowBuffPos* pPos = *(SRowBuffPos**)(pNode->data);
dengyihao's avatar
dengyihao 已提交
127
    if (all || (pFileState->getTs(pPos->pKey) < ts)) {
L
liuyao 已提交
128
      ASSERT(pPos->pRowBuff != NULL);
5
54liuyao 已提交
129
      tdListAppend(pFileState->freeBuffs, &(pPos->pRowBuff));
5
54liuyao 已提交
130
      pPos->pRowBuff = NULL;
L
liuyao 已提交
131 132 133
      if (!all) {
        tSimpleHashRemove(pFileState->rowBuffMap, pPos->pKey, pFileState->keyLen);
      }
5
54liuyao 已提交
134
      destroyRowBuffPos(pPos);
L
liuyao 已提交
135 136
      tdListPopNode(pFileState->usedBuffs, pNode);
      taosMemoryFreeClear(pNode);
5
54liuyao 已提交
137 138 139 140
    }
  }
}

5
54liuyao 已提交
141 142 143 144 145
void streamFileStateClear(SStreamFileState* pFileState) {
  tSimpleHashClear(pFileState->rowBuffMap);
  clearExpiredRowBuff(pFileState, 0, true);
}

L
liuyao 已提交
146
void popUsedBuffs(SStreamFileState* pFileState, SStreamSnapshot* pFlushList, uint64_t max, bool used) {
dengyihao's avatar
dengyihao 已提交
147
  uint64_t  i = 0;
5
54liuyao 已提交
148 149 150 151
  SListIter iter = {0};
  tdListInitIter(pFileState->usedBuffs, &iter, TD_LIST_FORWARD);

  SListNode* pNode = NULL;
L
liuyao 已提交
152
  while ((pNode = tdListNext(&iter)) != NULL && i < max) {
5
54liuyao 已提交
153
    SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data;
L
liuyao 已提交
154
    if (pPos->beUsed == used) {
5
54liuyao 已提交
155
      tdListAppend(pFlushList, &pPos);
5
54liuyao 已提交
156 157
      pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey));
      tSimpleHashRemove(pFileState->rowBuffMap, pPos->pKey, pFileState->keyLen);
L
liuyao 已提交
158 159
      tdListPopNode(pFileState->usedBuffs, pNode);
      taosMemoryFreeClear(pNode);
5
54liuyao 已提交
160 161 162
      i++;
    }
  }
L
liuyao 已提交
163
  qInfo("do stream state flush %d rows to disck. is used: %d", listNEles(pFlushList), used);
L
liuyao 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176
}

int32_t flushRowBuff(SStreamFileState* pFileState) {
  SStreamSnapshot* pFlushList = tdListNew(POINTER_BYTES);
  if (!pFlushList) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }
  uint64_t num = (uint64_t)(pFileState->curRowCount * FLUSH_RATIO);
  num = TMAX(num, FLUSH_NUM);
  popUsedBuffs(pFileState, pFlushList, num, false);
  if (isListEmpty(pFlushList)) {
    popUsedBuffs(pFileState, pFlushList, num, true);
  }
L
liuyao 已提交
177
  flushSnapshot(pFileState, pFlushList, false);
L
liuyao 已提交
178 179 180 181 182 183 184 185 186 187
  SListIter fIter = {0};
  tdListInitIter(pFlushList, &fIter, TD_LIST_FORWARD);
  SListNode* pNode = NULL;
  while ((pNode = tdListNext(&fIter)) != NULL) {
    SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data;
    ASSERT(pPos->pRowBuff != NULL);
    tdListAppend(pFileState->freeBuffs, &pPos->pRowBuff);
    pPos->pRowBuff = NULL;
  }
  tdListFreeP(pFlushList, destroyRowBuffPosPtr);
5
54liuyao 已提交
188 189 190 191
  return TSDB_CODE_SUCCESS;
}

int32_t clearRowBuff(SStreamFileState* pFileState) {
5
54liuyao 已提交
192
  clearExpiredRowBuff(pFileState, pFileState->maxTs - pFileState->deleteMark, false);
5
54liuyao 已提交
193 194 195 196 197 198
  if (isListEmpty(pFileState->freeBuffs)) {
    return flushRowBuff(pFileState);
  }
  return TSDB_CODE_SUCCESS;
}

5
54liuyao 已提交
199
void* getFreeBuff(SList* lists, int32_t buffSize) {
5
54liuyao 已提交
200 201 202 203 204
  SListNode* pNode = tdListPopHead(lists);
  if (!pNode) {
    return NULL;
  }
  void* ptr = *(void**)pNode->data;
5
54liuyao 已提交
205
  memset(ptr, 0, buffSize);
5
54liuyao 已提交
206 207 208 209 210 211
  taosMemoryFree(pNode);
  return ptr;
}

SRowBuffPos* getNewRowPos(SStreamFileState* pFileState) {
  SRowBuffPos* pPos = taosMemoryCalloc(1, sizeof(SRowBuffPos));
L
liuyao 已提交
212
  pPos->pKey = taosMemoryCalloc(1, pFileState->keyLen);
5
54liuyao 已提交
213
  void* pBuff = getFreeBuff(pFileState->freeBuffs, pFileState->rowSize);
5
54liuyao 已提交
214 215
  if (pBuff) {
    pPos->pRowBuff = pBuff;
L
liuyao 已提交
216
    goto _end;
5
54liuyao 已提交
217 218 219 220 221 222 223
  }

  if (pFileState->curRowCount < pFileState->maxRowCount) {
    pBuff = taosMemoryCalloc(1, pFileState->rowSize);
    if (pBuff) {
      pPos->pRowBuff = pBuff;
      pFileState->curRowCount++;
L
liuyao 已提交
224
      goto _end;
5
54liuyao 已提交
225 226 227 228 229
    }
  }

  int32_t code = clearRowBuff(pFileState);
  ASSERT(code == 0);
5
54liuyao 已提交
230
  pPos->pRowBuff = getFreeBuff(pFileState->freeBuffs, pFileState->rowSize);
L
liuyao 已提交
231 232 233 234

_end:
  tdListAppend(pFileState->usedBuffs, &pPos);
  ASSERT(pPos->pRowBuff != NULL);
5
54liuyao 已提交
235 236 237 238
  return pPos;
}

int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, void** pVal, int32_t* pVLen) {
5
54liuyao 已提交
239
  pFileState->maxTs = TMAX(pFileState->maxTs, pFileState->getTs(pKey));
5
54liuyao 已提交
240 241
  SRowBuffPos** pos = tSimpleHashGet(pFileState->rowBuffMap, pKey, keyLen);
  if (pos) {
L
liuyao 已提交
242 243 244
    *pVLen = pFileState->rowSize;
    *pVal = *pos;
    (*pos)->beUsed = true;
5
54liuyao 已提交
245 246 247
    return TSDB_CODE_SUCCESS;
  }
  SRowBuffPos* pNewPos = getNewRowPos(pFileState);
L
liuyao 已提交
248 249
  pNewPos->beUsed = true;
  ASSERT(pNewPos->pRowBuff);
5
54liuyao 已提交
250 251 252 253 254
  memcpy(pNewPos->pKey, pKey, keyLen);

  TSKEY ts = pFileState->getTs(pKey);
  if (ts > pFileState->maxTs - pFileState->deleteMark && ts < pFileState->flushMark) {
    int32_t len = 0;
dengyihao's avatar
dengyihao 已提交
255
    void*   pVal = NULL;
L
liuyao 已提交
256 257 258 259
    int32_t code = streamStateGet_rocksdb(pFileState->pFileStore, pKey, &pVal, &len);
    if (code == TSDB_CODE_SUCCESS) {
      memcpy(pNewPos->pRowBuff, pVal, len);
    }
5
54liuyao 已提交
260 261 262
    taosMemoryFree(pVal);
  }

5
54liuyao 已提交
263
  tSimpleHashPut(pFileState->rowBuffMap, pKey, keyLen, &pNewPos, POINTER_BYTES);
5
54liuyao 已提交
264 265 266 267
  if (pVal) {
    *pVLen = pFileState->rowSize;
    *pVal = pNewPos;
  }
5
54liuyao 已提交
268 269 270
  return TSDB_CODE_SUCCESS;
}

5
54liuyao 已提交
271 272 273 274 275 276 277
int32_t deleteRowBuff(SStreamFileState* pFileState, const void* pKey, int32_t keyLen) {
  int32_t code_buff = tSimpleHashRemove(pFileState->rowBuffMap, pKey, keyLen);
  int32_t code_rocks = streamStateDel_rocksdb(pFileState->pFileStore, pKey);
  return code_buff == TSDB_CODE_SUCCESS ? code_buff : code_rocks;
}

int32_t getRowBuffByPos(SStreamFileState* pFileState, SRowBuffPos* pPos, void** pVal) {
5
54liuyao 已提交
278
  if (pPos->pRowBuff) {
5
54liuyao 已提交
279 280
    (*pVal) = pPos->pRowBuff;
    return TSDB_CODE_SUCCESS;
5
54liuyao 已提交
281 282
  }

5
54liuyao 已提交
283
  pPos->pRowBuff = getFreeBuff(pFileState->freeBuffs, pFileState->rowSize);
L
liuyao 已提交
284 285 286 287 288 289
  if (!pPos->pRowBuff) {
    int32_t code = clearRowBuff(pFileState);
    ASSERT(code == 0);
    pPos->pRowBuff = getFreeBuff(pFileState->freeBuffs, pFileState->rowSize);
    ASSERT(pPos->pRowBuff);
  }
5
54liuyao 已提交
290 291

  int32_t len = 0;
dengyihao's avatar
dengyihao 已提交
292
  void*   pBuff = NULL;
L
liuyao 已提交
293 294 295
  streamStateGet_rocksdb(pFileState->pFileStore, pPos->pKey, &pBuff, &len);
  memcpy(pPos->pRowBuff, pBuff, len);
  taosMemoryFree(pBuff);
5
54liuyao 已提交
296
  (*pVal) = pPos->pRowBuff;
L
liuyao 已提交
297
  tdListPrepend(pFileState->usedBuffs, &pPos);
5
54liuyao 已提交
298 299 300 301 302 303 304 305 306
  return TSDB_CODE_SUCCESS;
}

bool hasRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen) {
  SRowBuffPos** pos = tSimpleHashGet(pFileState->rowBuffMap, pKey, keyLen);
  if (pos) {
    return true;
  }
  return false;
5
54liuyao 已提交
307 308
}

dengyihao's avatar
dengyihao 已提交
309
void releaseRowBuffPos(SRowBuffPos* pBuff) { pBuff->beUsed = false; }
5
54liuyao 已提交
310 311

SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) {
5
54liuyao 已提交
312
  clearExpiredRowBuff(pFileState, pFileState->maxTs - pFileState->deleteMark, false);
5
54liuyao 已提交
313 314 315
  return pFileState->usedBuffs;
}

L
liuyao 已提交
316 317 318 319 320 321
void streamFileStateDecode(SStreamFileState* pFileState, void* pBuff, int32_t len) {
  pBuff = taosDecodeFixedI64(pBuff, &pFileState->flushMark);
}

void streamFileStateEncode(SStreamFileState* pFileState, void** pVal, int32_t* pLen) {
  *pLen = sizeof(TSKEY);
L
liuyao 已提交
322
  (*pVal) = taosMemoryCalloc(1, *pLen);
dengyihao's avatar
dengyihao 已提交
323
  void* buff = *pVal;
L
liuyao 已提交
324
  taosEncodeFixedI64(&buff, pFileState->flushMark);
L
liuyao 已提交
325 326 327
}

int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) {
dengyihao's avatar
dengyihao 已提交
328
  int32_t   code = TSDB_CODE_SUCCESS;
5
54liuyao 已提交
329 330 331
  SListIter iter = {0};
  tdListInitIter(pSnapshot, &iter, TD_LIST_FORWARD);

dengyihao's avatar
dengyihao 已提交
332 333 334 335
  const int32_t BATCH_LIMIT = 128;
  SListNode*    pNode = NULL;

  void* batch = streamStateCreateBatch();
5
54liuyao 已提交
336 337
  while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) {
    SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data;
L
liuyao 已提交
338
    ASSERT(pPos->pRowBuff && pFileState->rowSize > 0);
dengyihao's avatar
dengyihao 已提交
339 340 341 342
    if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) {
      code = streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
      streamStateClearBatch(batch);
    }
dengyihao's avatar
dengyihao 已提交
343 344

    SStateKey sKey = {.key = *((SWinKey*)pPos->pKey), .opNum = ((SStreamState*)pFileState->pFileStore)->number};
dengyihao's avatar
dengyihao 已提交
345
    code = streamStatePutBatch(pFileState->pFileStore, "state", batch, &sKey, pPos->pRowBuff, pFileState->rowSize);
L
liuyao 已提交
346
  }
dengyihao's avatar
dengyihao 已提交
347 348 349 350
  if (streamStateGetBatchSize(batch) > 0) {
    code = streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
  }

L
liuyao 已提交
351 352
  if (flushState) {
    int32_t len = 0;
dengyihao's avatar
dengyihao 已提交
353
    void*   buff = NULL;
L
liuyao 已提交
354
    streamFileStateEncode(pFileState, &buff, &len);
dengyihao's avatar
dengyihao 已提交
355
    SWinKey key = {.ts = -1, .groupId = 0};  // dengyihao
L
liuyao 已提交
356
    streamStatePut_rocksdb(pFileState->pFileStore, &key, buff, len);
L
liuyao 已提交
357
    taosMemoryFree(buff);
5
54liuyao 已提交
358
  }
dengyihao's avatar
dengyihao 已提交
359 360

  streamStateDestroyBatch(batch);
5
54liuyao 已提交
361 362 363 364
  return code;
}

int32_t recoverSnapshot(SStreamFileState* pFileState) {
L
liuyao 已提交
365
  int32_t code = TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
366 367
  SWinKey stkey = {.ts = -1, .groupId = 0};  // dengyihao
  void*   pStVal = NULL;
L
liuyao 已提交
368 369 370 371 372 373 374 375
  int32_t len = 0;
  code = streamStateGet_rocksdb(pFileState->pFileStore, &stkey, &pStVal, &len);
  if (code == TSDB_CODE_SUCCESS) {
    streamFileStateDecode(pFileState, pStVal, len);
  } else {
    return TSDB_CODE_FAILED;
  }

dengyihao's avatar
dengyihao 已提交
376
  SWinKey          key = {.groupId = 0, .ts = 0};
L
liuyao 已提交
377 378 379 380 381 382 383 384 385 386 387 388
  SStreamStateCur* pCur = streamStateGetCur_rocksdb(pFileState->pFileStore, &key);
  if (!pCur) {
    return TSDB_CODE_FAILED;
  }
  code = streamStateSeekLast(pFileState->pFileStore, pCur);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
  while (code == TSDB_CODE_SUCCESS) {
    if (pFileState->curRowCount == pFileState->maxRowCount) {
      break;
    }
dengyihao's avatar
dengyihao 已提交
389 390
    void*        pVal = NULL;
    int32_t      pVLen = 0;
L
liuyao 已提交
391
    SRowBuffPos* pNewPos = getNewRowPos(pFileState);
dengyihao's avatar
dengyihao 已提交
392
    code = streamStateGetKVByCur_rocksdb(pCur, pNewPos->pKey, (const void**)&pVal, &pVLen);
L
liuyao 已提交
393 394 395 396 397 398 399 400 401 402 403 404 405 406
    if (code != TSDB_CODE_SUCCESS || pFileState->getTs(pNewPos->pKey) < pFileState->flushMark) {
      destroyRowBuffPos(pNewPos);
      break;
    }
    memcpy(pNewPos->pRowBuff, pVal, pVLen);
    taosMemoryFree(pVal);
    code = tSimpleHashPut(pFileState->rowBuffMap, pNewPos->pKey, pFileState->rowSize, &pNewPos, POINTER_BYTES);
    if (code != TSDB_CODE_SUCCESS) {
      destroyRowBuffPos(pNewPos);
      break;
    }
    code = streamStateCurPrev_rocksdb(pFileState->pFileStore, pCur);
  }

5
54liuyao 已提交
407 408
  return TSDB_CODE_SUCCESS;
}