tsdbMergeTree.c 28.0 KB
Newer Older
H
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "tsdb.h"
17
#include "tsdbFSet2.h"
18
#include "tsdbMerge.h"
H
Haojun Liao 已提交
19
#include "tsdbReadUtil.h"
20
#include "tsdbSttFileRW.h"
H
Hongze Cheng 已提交
21

H
Haojun Liao 已提交
22 23
static void tLDataIterClose2(SLDataIter *pIter);

24
// SLDataIter =================================================
25 26
SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols,
                                            int32_t numOfSttTrigger) {
27
  SSttBlockLoadInfo *pLoadInfo = taosMemoryCalloc(numOfSttTrigger, sizeof(SSttBlockLoadInfo));
28
  if (pLoadInfo == NULL) {
H
Hongze Cheng 已提交
29
    terrno = TSDB_CODE_OUT_OF_MEMORY;
30 31 32
    return NULL;
  }

33
  for (int32_t i = 0; i < numOfSttTrigger; ++i) {
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
    pLoadInfo[i].blockIndex[0] = -1;
    pLoadInfo[i].blockIndex[1] = -1;
    pLoadInfo[i].currentLoadBlockIndex = 1;

    int32_t code = tBlockDataCreate(&pLoadInfo[i].blockData[0]);
    if (code) {
      terrno = code;
    }

    code = tBlockDataCreate(&pLoadInfo[i].blockData[1]);
    if (code) {
      terrno = code;
    }

    pLoadInfo[i].aSttBlk = taosArrayInit(4, sizeof(SSttBlk));
H
Haojun Liao 已提交
49 50 51
    pLoadInfo[i].pSchema = pSchema;
    pLoadInfo[i].colIds = colList;
    pLoadInfo[i].numOfCols = numOfCols;
52 53 54 55 56
  }

  return pLoadInfo;
}

57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols) {
  SSttBlockLoadInfo *pLoadInfo = taosMemoryCalloc(1, sizeof(SSttBlockLoadInfo));
  if (pLoadInfo == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  pLoadInfo->blockIndex[0] = -1;
  pLoadInfo->blockIndex[1] = -1;
  pLoadInfo->currentLoadBlockIndex = 1;

  int32_t code = tBlockDataCreate(&pLoadInfo->blockData[0]);
  if (code) {
    terrno = code;
  }

  code = tBlockDataCreate(&pLoadInfo->blockData[1]);
  if (code) {
    terrno = code;
  }

  pLoadInfo->aSttBlk = taosArrayInit(4, sizeof(SSttBlk));
  pLoadInfo->pSchema = pSchema;
  pLoadInfo->colIds = colList;
  pLoadInfo->numOfCols = numOfCols;

  return pLoadInfo;
}

H
Hongze Cheng 已提交
86
void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) {
87
  for (int32_t i = 0; i < 1; ++i) {
88 89 90
    pLoadInfo[i].currentLoadBlockIndex = 1;
    pLoadInfo[i].blockIndex[0] = -1;
    pLoadInfo[i].blockIndex[1] = -1;
91

92
    taosArrayClear(pLoadInfo[i].aSttBlk);
H
Haojun Liao 已提交
93 94 95

    pLoadInfo[i].elapsedTime = 0;
    pLoadInfo[i].loadBlocks = 0;
96
    pLoadInfo[i].sttBlockLoaded = false;
H
Haojun Liao 已提交
97 98 99
  }
}

H
Hongze Cheng 已提交
100
void getLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, int64_t *blocks, double *el) {
101
  for (int32_t i = 0; i < 1; ++i) {
H
Haojun Liao 已提交
102 103
    *el += pLoadInfo[i].elapsedTime;
    *blocks += pLoadInfo[i].loadBlocks;
104 105 106
  }
}

107 108
static void freeTombBlock(void *param) {
  STombBlock **pTombBlock = (STombBlock **)param;
H
Haojun Liao 已提交
109 110 111 112
  tTombBlockDestroy(*pTombBlock);
  taosMemoryFree(*pTombBlock);
}

H
Hongze Cheng 已提交
113
void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) {
H
Haojun Liao 已提交
114 115 116 117
  if (pLoadInfo == NULL) {
    return NULL;
  }

118
  for (int32_t i = 0; i < 1; ++i) {
119 120 121 122
    pLoadInfo[i].currentLoadBlockIndex = 1;
    pLoadInfo[i].blockIndex[0] = -1;
    pLoadInfo[i].blockIndex[1] = -1;

H
Hongze Cheng 已提交
123 124
    tBlockDataDestroy(&pLoadInfo[i].blockData[0]);
    tBlockDataDestroy(&pLoadInfo[i].blockData[1]);
125 126 127 128 129 130 131 132

    taosArrayDestroy(pLoadInfo[i].aSttBlk);
  }

  taosMemoryFree(pLoadInfo);
  return NULL;
}

133
static void destroyLDataIter(SLDataIter *pIter) {
134 135 136
  tLDataIterClose2(pIter);
  destroyLastBlockLoadInfo(pIter->pBlockLoadInfo);
  taosMemoryFree(pIter);
137 138
}

139
void *destroySttBlockReader(SArray *pLDataIterArray, int64_t *blocks, double *el) {
140 141
  if (pLDataIterArray == NULL) {
    return NULL;
H
Haojun Liao 已提交
142 143
  }

144
  int32_t numOfLevel = taosArrayGetSize(pLDataIterArray);
145 146 147 148
  for (int32_t i = 0; i < numOfLevel; ++i) {
    SArray *pList = taosArrayGetP(pLDataIterArray, i);
    for (int32_t j = 0; j < taosArrayGetSize(pList); ++j) {
      SLDataIter *pIter = taosArrayGetP(pList, j);
S
slzhou 已提交
149 150
      *el += pIter->pBlockLoadInfo->elapsedTime;
      *blocks += pIter->pBlockLoadInfo->loadBlocks;
151 152 153
      destroyLDataIter(pIter);
    }
    taosArrayDestroy(pList);
H
Haojun Liao 已提交
154
  }
155 156 157

  taosArrayDestroy(pLDataIterArray);
  return NULL;
H
Haojun Liao 已提交
158 159
}

H
Hongze Cheng 已提交
160
static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) {
161 162
  int32_t code = 0;

H
Hongze Cheng 已提交
163 164
  SSttBlockLoadInfo *pInfo = pIter->pBlockLoadInfo;
  if (pInfo->blockIndex[0] == pIter->iSttBlk) {
165 166 167 168 169
    if (pInfo->currentLoadBlockIndex != 0) {
      tsdbDebug("current load index is set to 0, block index:%d, file index:%d, due to uid:%" PRIu64 ", load data, %s",
                pIter->iSttBlk, pIter->iStt, pIter->uid, idStr);
      pInfo->currentLoadBlockIndex = 0;
    }
170 171 172
    return &pInfo->blockData[0];
  }

173
  if (pInfo->blockIndex[1] == pIter->iSttBlk) {
174
    if (pInfo->currentLoadBlockIndex != 1) {
H
Hongze Cheng 已提交
175
      tsdbDebug("current load index is set to 1, block index:%d, file index:%d, due to uid:%" PRIu64 ", load data, %s",
176 177 178
                pIter->iSttBlk, pIter->iStt, pIter->uid, idStr);
      pInfo->currentLoadBlockIndex = 1;
    }
179 180 181
    return &pInfo->blockData[1];
  }

182
  if (pIter->pSttBlk == NULL || pInfo->pSchema == NULL) {
H
Haojun Liao 已提交
183 184 185 186
    return NULL;
  }

  // current block not loaded yet
187
  pInfo->currentLoadBlockIndex ^= 1;
H
Haojun Liao 已提交
188
  int64_t st = taosGetTimestampUs();
189

H
Haojun Liao 已提交
190
  SBlockData *pBlock = &pInfo->blockData[pInfo->currentLoadBlockIndex];
191 192
  code = tsdbSttFileReadBlockDataByColumn(pIter->pReader, pIter->pSttBlk, pBlock, pInfo->pSchema, &pInfo->colIds[1],
                                          pInfo->numOfCols - 1);
H
Haojun Liao 已提交
193 194 195
  if (code != TSDB_CODE_SUCCESS) {
    goto _exit;
  }
H
Haojun Liao 已提交
196

H
Haojun Liao 已提交
197 198 199
  double el = (taosGetTimestampUs() - st) / 1000.0;
  pInfo->elapsedTime += el;
  pInfo->loadBlocks += 1;
200

H
Haojun Liao 已提交
201
  tsdbDebug("read last block, total load:%d, trigger by uid:%" PRIu64
202
            ", last file index:%d, last block index:%d, entry:%d, rows:%d, %p, elapsed time:%.2f ms, %s",
dengyihao's avatar
dengyihao 已提交
203 204
            pInfo->loadBlocks, pIter->uid, pIter->iStt, pIter->iSttBlk, pInfo->currentLoadBlockIndex, pBlock->nRow,
            pBlock, el, idStr);
205

H
Haojun Liao 已提交
206 207
  pInfo->blockIndex[pInfo->currentLoadBlockIndex] = pIter->iSttBlk;
  pIter->iRow = (pIter->backward) ? pInfo->blockData[pInfo->currentLoadBlockIndex].nRow : -1;
H
Haojun Liao 已提交
208

209 210
  tsdbDebug("last block index list:%d, %d, rowIndex:%d %s", pInfo->blockIndex[0], pInfo->blockIndex[1], pIter->iRow,
            idStr);
211 212
  return &pInfo->blockData[pInfo->currentLoadBlockIndex];

H
Hongze Cheng 已提交
213
_exit:
214 215 216 217 218
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
  }

  return NULL;
219 220
}

221
// find the earliest block that contains the required records
H
Hongze Cheng 已提交
222 223
static FORCE_INLINE int32_t findEarliestIndex(int32_t index, uint64_t uid, const SSttBlk *pBlockList, int32_t num,
                                              int32_t backward) {
224
  int32_t i = index;
H
Hongze Cheng 已提交
225
  int32_t step = backward ? 1 : -1;
H
Haojun Liao 已提交
226
  while (i >= 0 && i < num && uid >= pBlockList[i].minUid && uid <= pBlockList[i].maxUid) {
227 228 229 230 231
    i += step;
  }
  return i - step;
}

H
Hongze Cheng 已提交
232
static int32_t binarySearchForStartBlock(SSttBlk *pBlockList, int32_t num, uint64_t uid, int32_t backward) {
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
  int32_t midPos = -1;
  if (num <= 0) {
    return -1;
  }

  int32_t firstPos = 0;
  int32_t lastPos = num - 1;

  // find the first position which is bigger than the key
  if ((uid > pBlockList[lastPos].maxUid) || (uid < pBlockList[firstPos].minUid)) {
    return -1;
  }

  while (1) {
    if (uid >= pBlockList[firstPos].minUid && uid <= pBlockList[firstPos].maxUid) {
248
      return findEarliestIndex(firstPos, uid, pBlockList, num, backward);
249 250 251 252 253 254 255 256 257 258 259 260 261 262
    }

    if (uid > pBlockList[lastPos].maxUid || uid < pBlockList[firstPos].minUid) {
      return -1;
    }

    int32_t numOfRows = lastPos - firstPos + 1;
    midPos = (numOfRows >> 1u) + firstPos;

    if (uid < pBlockList[midPos].minUid) {
      lastPos = midPos - 1;
    } else if (uid > pBlockList[midPos].maxUid) {
      firstPos = midPos + 1;
    } else {
263
      return findEarliestIndex(midPos, uid, pBlockList, num, backward);
264 265 266 267
    }
  }
}

H
Hongze Cheng 已提交
268 269
static FORCE_INLINE int32_t findEarliestRow(int32_t index, uint64_t uid, const uint64_t *uidList, int32_t num,
                                            int32_t backward) {
270
  int32_t i = index;
H
Hongze Cheng 已提交
271
  int32_t step = backward ? 1 : -1;
H
Haojun Liao 已提交
272
  while (i >= 0 && i < num && uid == uidList[i]) {
273 274 275 276 277
    i += step;
  }
  return i - step;
}

H
Hongze Cheng 已提交
278
static int32_t binarySearchForStartRowIndex(uint64_t *uidList, int32_t num, uint64_t uid, int32_t backward) {
279 280 281 282 283 284 285 286 287 288
  int32_t firstPos = 0;
  int32_t lastPos = num - 1;

  // find the first position which is bigger than the key
  if ((uid > uidList[lastPos]) || (uid < uidList[firstPos])) {
    return -1;
  }

  while (1) {
    if (uid == uidList[firstPos]) {
289
      return findEarliestRow(firstPos, uid, uidList, num, backward);
290 291 292 293 294 295 296 297 298 299 300 301 302 303
    }

    if (uid > uidList[lastPos] || uid < uidList[firstPos]) {
      return -1;
    }

    int32_t numOfRows = lastPos - firstPos + 1;
    int32_t midPos = (numOfRows >> 1u) + firstPos;

    if (uid < uidList[midPos]) {
      lastPos = midPos - 1;
    } else if (uid > uidList[midPos]) {
      firstPos = midPos + 1;
    } else {
304
      return findEarliestRow(midPos, uid, uidList, num, backward);
305 306 307 308
    }
  }
}

309
int32_t tLDataIterOpen(struct SLDataIter *pIter, SDataFReader *pReader, int32_t iStt, int8_t backward, uint64_t suid,
310 311
                       uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange, SSttBlockLoadInfo *pBlockLoadInfo,
                       const char *idStr, bool strictTimeRange) {
312 313 314
  return 0;
}

315 316
static int32_t extractSttBlockInfo(SLDataIter *pIter, const TSttBlkArray *pArray, SSttBlockLoadInfo *pBlockLoadInfo,
                                   uint64_t suid) {
317 318 319 320 321 322 323 324 325
  if (TARRAY2_SIZE(pArray) <= 0) {
    return TSDB_CODE_SUCCESS;
  }

  SSttBlk *pStart = &pArray->data[0];
  SSttBlk *pEnd = &pArray->data[TARRAY2_SIZE(pArray) - 1];

  // all identical
  if (pStart->suid == pEnd->suid) {
326
    if (pStart->suid != suid) {  // no qualified stt block existed
327 328 329 330 331 332 333 334 335 336
      taosArrayClear(pBlockLoadInfo->aSttBlk);
      pIter->iSttBlk = -1;
      return TSDB_CODE_SUCCESS;
    } else {  // all blocks are qualified
      taosArrayClear(pBlockLoadInfo->aSttBlk);
      taosArrayAddBatch(pBlockLoadInfo->aSttBlk, pArray->data, pArray->size);
    }
  } else {
    SArray *pTmp = taosArrayInit(TARRAY2_SIZE(pArray), sizeof(SSttBlk));
    for (int32_t i = 0; i < TARRAY2_SIZE(pArray); ++i) {
337
      SSttBlk *p = &pArray->data[i];
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
      if (p->suid < suid) {
        continue;
      }

      if (p->suid == suid) {
        taosArrayPush(pTmp, p);
      } else if (p->suid > suid) {
        break;
      }
    }

    taosArrayDestroy(pBlockLoadInfo->aSttBlk);
    pBlockLoadInfo->aSttBlk = pTmp;
  }

  return TSDB_CODE_SUCCESS;
}

356 357
static int32_t suidComparFn(const void *target, const void *p2) {
  const uint64_t *targetUid = target;
358
  const uint64_t *uid2 = p2;
359 360 361
  if (*uid2 == (*targetUid)) {
    return 0;
  } else {
H
Hongze Cheng 已提交
362
    return (*targetUid) < (*uid2) ? -1 : 1;
363
  }
364
}
365

366 367 368 369
static bool existsFromSttBlkStatis(const TStatisBlkArray *pStatisBlkArray, uint64_t suid, uint64_t uid,
                                   SSttFileReader *pReader) {
  if (TARRAY2_SIZE(pStatisBlkArray) <= 0) {
    return true;
370 371
  }

372 373 374
  int32_t i = 0;
  for (i = 0; i < TARRAY2_SIZE(pStatisBlkArray); ++i) {
    SStatisBlk *p = &pStatisBlkArray->data[i];
H
Haojun Liao 已提交
375
    if (p->minTbid.suid <= suid && p->maxTbid.suid >= suid) {
376
      break;
377
    }
378
  }
379

380 381
  if (i >= TARRAY2_SIZE(pStatisBlkArray)) {
    return false;
382 383
  }

H
Hongze Cheng 已提交
384
  while (i < TARRAY2_SIZE(pStatisBlkArray)) {
385 386 387 388
    SStatisBlk *p = &pStatisBlkArray->data[i];
    if (p->minTbid.suid > suid) {
      return false;
    }
389

390 391 392 393 394 395 396
    STbStatisBlock block = {0};
    tsdbSttFileReadStatisBlock(pReader, p, &block);

    int32_t index = tarray2SearchIdx(block.suid, &suid, sizeof(int64_t), suidComparFn, TD_EQ);
    if (index == -1) {
      tStatisBlockDestroy(&block);
      return false;
397
    }
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420

    int32_t j = index;
    if (block.uid->data[j] == uid) {
      tStatisBlockDestroy(&block);
      return true;
    } else if (block.uid->data[j] > uid) {
      while (j >= 0 && block.suid->data[j] == suid) {
        if (block.uid->data[j] == uid) {
          tStatisBlockDestroy(&block);
          return true;
        } else {
          j -= 1;
        }
      }
    } else {
      j = index + 1;
      while (j < block.suid->size && block.suid->data[j] == suid) {
        if (block.uid->data[j] == uid) {
          tStatisBlockDestroy(&block);
          return true;
        } else {
          j += 1;
        }
421 422
      }
    }
423 424 425

    tStatisBlockDestroy(&block);
    i += 1;
426
  }
427

428
  return false;
429 430
}

431 432 433 434
int32_t tLDataIterOpen2(struct SLDataIter *pIter, SSttFileReader *pSttFileReader, int32_t iStt, int8_t backward,
                        uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange,
                        SSttBlockLoadInfo *pBlockLoadInfo, const char *idStr, bool strictTimeRange,
                        _load_tomb_fn loadTombFn, void *pReader1) {
H
Haojun Liao 已提交
435 436
  int32_t code = TSDB_CODE_SUCCESS;

437 438 439
  pIter->uid = uid;
  pIter->iStt = iStt;
  pIter->backward = backward;
440 441 442 443
  pIter->verRange.minVer = pRange->minVer;
  pIter->verRange.maxVer = pRange->maxVer;
  pIter->timeWindow.skey = pTimeWindow->skey;
  pIter->timeWindow.ekey = pTimeWindow->ekey;
H
Haojun Liao 已提交
444
  pIter->pReader = pSttFileReader;
445
  pIter->pBlockLoadInfo = pBlockLoadInfo;
446

S
slzhou 已提交
447 448 449 450 451 452 453
  if (pIter->pReader == NULL) {
    tsdbError("stt file reader is null, %s", idStr);
    pIter->pSttBlk = NULL;
    pIter->iSttBlk = -1;
    return TSDB_CODE_SUCCESS;
  }

454
  if (!pBlockLoadInfo->sttBlockLoaded) {
455
    int64_t st = taosGetTimestampUs();
456

457
    const TSttBlkArray *pSttBlkArray = NULL;
458
    pBlockLoadInfo->sttBlockLoaded = true;
459

460 461
    // load the stt block info for each stt-block
    code = tsdbSttFileReadSttBlk(pIter->pReader, &pSttBlkArray);
462
    if (code != TSDB_CODE_SUCCESS) {
463
      tsdbError("load stt blk failed, code:%s, %s", tstrerror(code), idStr);
464
      return code;
465 466
    }

467
    code = extractSttBlockInfo(pIter, pSttBlkArray, pBlockLoadInfo, suid);
468
    if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
469
      tsdbError("load stt block info failed, code:%s, %s", tstrerror(code), idStr);
470
      return code;
471
    }
472

473 474 475 476 477 478 479 480
    // load stt blocks statis for all stt-blocks, to decide if the data of queried table exists in current stt file
    code = tsdbSttFileReadStatisBlk(pIter->pReader, (const TStatisBlkArray **)&pBlockLoadInfo->pSttStatisBlkArray);
    if (code != TSDB_CODE_SUCCESS) {
      tsdbError("failed to load stt block statistics, code:%s, %s", tstrerror(code), idStr);
      return code;
    }

    code = loadTombFn(pReader1, pIter->pReader, pIter->pBlockLoadInfo);
H
Haojun Liao 已提交
481

H
Hongze Cheng 已提交
482
    double el = (taosGetTimestampUs() - st) / 1000.0;
483 484 485
    tsdbDebug("load the stt file info completed, elapsed time:%.2fms, %s", el, idStr);
  }

H
Hongze Cheng 已提交
486 487 488 489 490 491
  // bool exists = existsFromSttBlkStatis(pBlockLoadInfo->pSttStatisBlkArray, suid, uid, pIter->pReader);
  // if (!exists) {
  //   pIter->iSttBlk = -1;
  //   pIter->pSttBlk = NULL;
  //   return TSDB_CODE_SUCCESS;
  // }
H
Hongze Cheng 已提交
492

493 494
  // find the start block, actually we could load the position to avoid repeatly searching for the start position when
  // the skey is updated.
H
Haojun Liao 已提交
495 496
  size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk);
  pIter->iSttBlk = binarySearchForStartBlock(pBlockLoadInfo->aSttBlk->pData, size, uid, backward);
497
  if (pIter->iSttBlk != -1) {
H
Haojun Liao 已提交
498
    pIter->pSttBlk = taosArrayGet(pBlockLoadInfo->aSttBlk, pIter->iSttBlk);
499 500 501 502 503
    pIter->iRow = (pIter->backward) ? pIter->pSttBlk->nRow : -1;

    if ((!backward) && ((strictTimeRange && pIter->pSttBlk->minKey >= pIter->timeWindow.ekey) ||
                        (!strictTimeRange && pIter->pSttBlk->minKey > pIter->timeWindow.ekey))) {
      pIter->pSttBlk = NULL;
504 505
    }

506 507 508 509
    if (backward && ((strictTimeRange && pIter->pSttBlk->maxKey <= pIter->timeWindow.skey) ||
                     (!strictTimeRange && pIter->pSttBlk->maxKey < pIter->timeWindow.skey))) {
      pIter->pSttBlk = NULL;
      pIter->ignoreEarlierTs = true;
510
    }
H
Hongze Cheng 已提交
511 512
  }

H
Haojun Liao 已提交
513
  return code;
H
Hongze Cheng 已提交
514 515
}

H
Haojun Liao 已提交
516 517
void tLDataIterClose2(SLDataIter *pIter) {
  tsdbSttFileReaderClose(&pIter->pReader);
H
Haojun Liao 已提交
518
  pIter->pReader = NULL;
H
Haojun Liao 已提交
519
}
H
Hongze Cheng 已提交
520

H
Hongze Cheng 已提交
521
void tLDataIterNextBlock(SLDataIter *pIter, const char *idStr) {
H
Hongze Cheng 已提交
522
  int32_t step = pIter->backward ? -1 : 1;
523 524
  int32_t oldIndex = pIter->iSttBlk;

H
Hongze Cheng 已提交
525
  pIter->iSttBlk += step;
H
Hongze Cheng 已提交
526

527
  int32_t index = -1;
H
Haojun Liao 已提交
528
  size_t  size = pIter->pBlockLoadInfo->aSttBlk->size;
H
Hongze Cheng 已提交
529
  for (int32_t i = pIter->iSttBlk; i < size && i >= 0; i += step) {
530
    SSttBlk *p = taosArrayGet(pIter->pBlockLoadInfo->aSttBlk, i);
531 532 533 534 535
    if ((!pIter->backward) && p->minUid > pIter->uid) {
      break;
    }

    if (pIter->backward && p->maxUid < pIter->uid) {
536 537 538
      break;
    }

539
    // check uid firstly
540
    if (p->minUid <= pIter->uid && p->maxUid >= pIter->uid) {
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
      if ((!pIter->backward) && p->minKey > pIter->timeWindow.ekey) {
        break;
      }

      if (pIter->backward && p->maxKey < pIter->timeWindow.skey) {
        break;
      }

      // check time range secondly
      if (p->minKey <= pIter->timeWindow.ekey && p->maxKey >= pIter->timeWindow.skey) {
        if ((!pIter->backward) && p->minVer > pIter->verRange.maxVer) {
          break;
        }

        if (pIter->backward && p->maxVer < pIter->verRange.minVer) {
          break;
        }

        if (p->minVer <= pIter->verRange.maxVer && p->maxVer >= pIter->verRange.minVer) {
          index = i;
          break;
        }
      }
564 565 566
    }
  }

567 568
  pIter->pSttBlk = NULL;
  if (index != -1) {
H
Haojun Liao 已提交
569
    pIter->iSttBlk = index;
570
    pIter->pSttBlk = (SSttBlk *)taosArrayGet(pIter->pBlockLoadInfo->aSttBlk, pIter->iSttBlk);
571 572
    tsdbDebug("try next last file block:%d from stt fileIdx:%d, trigger by uid:%" PRIu64 ", file index:%d, %s",
              pIter->iSttBlk, oldIndex, pIter->uid, pIter->iStt, idStr);
573
  } else {
574
    tsdbDebug("no more last block qualified, uid:%" PRIu64 ", file index:%d, %s", pIter->uid, oldIndex, idStr);
575 576 577
  }
}

H
Hongze Cheng 已提交
578 579
static void findNextValidRow(SLDataIter *pIter, const char *idStr) {
  bool    hasVal = false;
580
  int32_t step = pIter->backward ? -1 : 1;
H
Hongze Cheng 已提交
581
  int32_t i = pIter->iRow;
582

583
  SBlockData *pData = loadLastBlock(pIter, idStr);
584

585
  // mostly we only need to find the start position for a given table
586 587
  if ((((i == 0) && (!pIter->backward)) || (i == pData->nRow - 1 && pIter->backward)) && pData->aUid != NULL) {
    i = binarySearchForStartRowIndex((uint64_t *)pData->aUid, pData->nRow, pIter->uid, pIter->backward);
H
Haojun Liao 已提交
588
    if (i == -1) {
589
      tsdbDebug("failed to find the data in pBlockData, uid:%" PRIu64 " , %s", pIter->uid, idStr);
H
Haojun Liao 已提交
590 591 592
      pIter->iRow = -1;
      return;
    }
593 594
  }

595 596
  for (; i < pData->nRow && i >= 0; i += step) {
    if (pData->aUid != NULL) {
597
      if (!pIter->backward) {
598
        if (pData->aUid[i] > pIter->uid) {
599 600 601
          break;
        }
      } else {
602
        if (pData->aUid[i] < pIter->uid) {
603 604 605 606 607
          break;
        }
      }
    }

608
    int64_t ts = pData->aTSKEY[i];
H
Hongze Cheng 已提交
609
    if (!pIter->backward) {               // asc
610 611 612 613 614 615 616 617 618 619 620
      if (ts > pIter->timeWindow.ekey) {  // no more data
        break;
      } else if (ts < pIter->timeWindow.skey) {
        continue;
      }
    } else {
      if (ts < pIter->timeWindow.skey) {
        break;
      } else if (ts > pIter->timeWindow.ekey) {
        continue;
      }
621 622
    }

623
    int64_t ver = pData->aVersion[i];
624 625 626 627 628 629 630 631 632 633 634
    if (ver < pIter->verRange.minVer) {
      continue;
    }

    // todo opt handle desc case
    if (ver > pIter->verRange.maxVer) {
      continue;
    }

    hasVal = true;
    break;
H
Hongze Cheng 已提交
635
  }
636

H
Hongze Cheng 已提交
637
  pIter->iRow = (hasVal) ? i : -1;
H
Hongze Cheng 已提交
638 639
}

H
Hongze Cheng 已提交
640
bool tLDataIterNextRow(SLDataIter *pIter, const char *idStr) {
H
Hongze Cheng 已提交
641
  int32_t step = pIter->backward ? -1 : 1;
H
Haojun Liao 已提交
642
  terrno = TSDB_CODE_SUCCESS;
643 644

  // no qualified last file block in current file, no need to fetch row
H
Hongze Cheng 已提交
645
  if (pIter->pSttBlk == NULL) {
646 647
    return false;
  }
H
Hongze Cheng 已提交
648

H
Hongze Cheng 已提交
649
  int32_t     iBlockL = pIter->iSttBlk;
H
Haojun Liao 已提交
650
  SBlockData *pBlockData = loadLastBlock(pIter, idStr);
H
Haojun Liao 已提交
651
  if (pBlockData == NULL || terrno != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
652 653 654
    goto _exit;
  }

655 656
  pIter->iRow += step;

H
Hongze Cheng 已提交
657
  while (1) {
658
    bool skipBlock = false;
H
Haojun Liao 已提交
659
    findNextValidRow(pIter, idStr);
660

661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
    if (pIter->pBlockLoadInfo->checkRemainingRow) {
      skipBlock = true;
      int16_t *aCols = pIter->pBlockLoadInfo->colIds;
      int      nCols = pIter->pBlockLoadInfo->numOfCols;
      bool     isLast = pIter->pBlockLoadInfo->isLast;
      for (int inputColIndex = 0; inputColIndex < nCols; ++inputColIndex) {
        for (int colIndex = 0; colIndex < pBlockData->nColData; ++colIndex) {
          SColData *pColData = &pBlockData->aColData[colIndex];
          int16_t   cid = pColData->cid;

          if (cid == aCols[inputColIndex]) {
            if (isLast && (pColData->flag & HAS_VALUE)) {
              skipBlock = false;
              break;
            } else if (pColData->flag & (HAS_VALUE | HAS_NULL)) {
              skipBlock = false;
              break;
            }
          }
        }
      }
    }

    if (skipBlock || pIter->iRow >= pBlockData->nRow || pIter->iRow < 0) {
685
      tLDataIterNextBlock(pIter, idStr);
H
Hongze Cheng 已提交
686
      if (pIter->pSttBlk == NULL) {  // no more data
687 688 689 690
        goto _exit;
      }
    } else {
      break;
H
Hongze Cheng 已提交
691 692
    }

H
Hongze Cheng 已提交
693
    if (iBlockL != pIter->iSttBlk) {
H
Haojun Liao 已提交
694
      pBlockData = loadLastBlock(pIter, idStr);
695 696 697
      if (pBlockData == NULL) {
        goto _exit;
      }
H
Haojun Liao 已提交
698 699

      // set start row index
700
      pIter->iRow = pIter->backward ? pBlockData->nRow - 1 : 0;
H
Hongze Cheng 已提交
701 702 703
    }
  }

704 705 706
  pIter->rInfo.suid = pBlockData->suid;
  pIter->rInfo.uid = pBlockData->uid;
  pIter->rInfo.row = tsdbRowFromBlockData(pBlockData, pIter->iRow);
H
Hongze Cheng 已提交
707 708

_exit:
709
  return (terrno == TSDB_CODE_SUCCESS) && (pIter->pSttBlk != NULL) && (pBlockData != NULL);
H
Hongze Cheng 已提交
710 711
}

H
Hongze Cheng 已提交
712
SRowInfo *tLDataIterGet(SLDataIter *pIter) { return &pIter->rInfo; }
H
Hongze Cheng 已提交
713 714

// SMergeTree =================================================
H
Hongze Cheng 已提交
715 716 717
static FORCE_INLINE int32_t tLDataIterCmprFn(const SRBTreeNode *p1, const SRBTreeNode *p2) {
  SLDataIter *pIter1 = (SLDataIter *)(((uint8_t *)p1) - offsetof(SLDataIter, node));
  SLDataIter *pIter2 = (SLDataIter *)(((uint8_t *)p2) - offsetof(SLDataIter, node));
H
Hongze Cheng 已提交
718

719 720
  TSDBKEY key1 = TSDBROW_KEY(&pIter1->rInfo.row);
  TSDBKEY key2 = TSDBROW_KEY(&pIter2->rInfo.row);
H
Hongze Cheng 已提交
721

722 723 724 725 726 727 728 729 730 731 732 733 734
  if (key1.ts < key2.ts) {
    return -1;
  } else if (key1.ts > key2.ts) {
    return 1;
  } else {
    if (key1.version < key2.version) {
      return -1;
    } else if (key1.version > key2.version) {
      return 1;
    } else {
      return 0;
    }
  }
H
Hongze Cheng 已提交
735 736
}

737 738 739 740
static FORCE_INLINE int32_t tLDataIterDescCmprFn(const SRBTreeNode *p1, const SRBTreeNode *p2) {
  return -1 * tLDataIterCmprFn(p1, p2);
}

741
int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid,
742
                       STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo,
743
                       bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter) {
744 745
  int32_t code = TSDB_CODE_SUCCESS;

H
Hongze Cheng 已提交
746
  pMTree->backward = backward;
747
  pMTree->pIter = NULL;
H
Haojun Liao 已提交
748
  pMTree->idStr = idStr;
749

dengyihao's avatar
dengyihao 已提交
750
  if (!pMTree->backward) {  // asc
751
    tRBTreeCreate(&pMTree->rbt, tLDataIterCmprFn);
dengyihao's avatar
dengyihao 已提交
752
  } else {  // desc
753 754
    tRBTreeCreate(&pMTree->rbt, tLDataIterDescCmprFn);
  }
755

756 757
  pMTree->pLoadInfo = pBlockLoadInfo;
  pMTree->destroyLoadInfo = destroyLoadInfo;
758
  pMTree->ignoreEarlierTs = false;
759

H
Hongze Cheng 已提交
760
  for (int32_t i = 0; i < pFReader->pSet->nSttF; ++i) {  // open all last file
761 762
    memset(&pLDataIter[i], 0, sizeof(SLDataIter));
    code = tLDataIterOpen(&pLDataIter[i], pFReader, i, pMTree->backward, suid, uid, pTimeWindow, pVerRange,
763
                          &pMTree->pLoadInfo[i], pMTree->idStr, strictTimeRange);
764 765 766 767
    if (code != TSDB_CODE_SUCCESS) {
      goto _end;
    }

768
    bool hasVal = tLDataIterNextRow(&pLDataIter[i], pMTree->idStr);
769
    if (hasVal) {
770
      tMergeTreeAddIter(pMTree, &pLDataIter[i]);
771
    } else {
772
      if (!pMTree->ignoreEarlierTs) {
773
        pMTree->ignoreEarlierTs = pLDataIter[i].ignoreEarlierTs;
774
      }
775 776
    }
  }
777 778 779

  return code;

H
Hongze Cheng 已提交
780
_end:
781 782
  tMergeTreeClose(pMTree);
  return code;
H
Hongze Cheng 已提交
783
}
H
Hongze Cheng 已提交
784

785
int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) {
H
Haojun Liao 已提交
786
  int32_t code = TSDB_CODE_SUCCESS;
787

H
Haojun Liao 已提交
788
  pMTree->pIter = NULL;
789 790
  pMTree->backward = pConf->backward;
  pMTree->idStr = pConf->idstr;
791

H
Haojun Liao 已提交
792 793 794 795 796
  if (!pMTree->backward) {  // asc
    tRBTreeCreate(&pMTree->rbt, tLDataIterCmprFn);
  } else {  // desc
    tRBTreeCreate(&pMTree->rbt, tLDataIterDescCmprFn);
  }
797

H
Haojun Liao 已提交
798
  pMTree->ignoreEarlierTs = false;
799

800
  int32_t size = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->size;
H
Haojun Liao 已提交
801 802 803
  if (size == 0) {
    goto _end;
  }
804

805
  // add the list/iter placeholder
806
  while (taosArrayGetSize(pConf->pSttFileBlockIterArray) < size) {
807
    SArray *pList = taosArrayInit(4, POINTER_BYTES);
808
    taosArrayPush(pConf->pSttFileBlockIterArray, &pList);
809
  }
810

811
  for (int32_t j = 0; j < size; ++j) {
812
    SSttLvl *pSttLevel = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->data[j];
813
    ASSERT(pSttLevel->level == j);
814

815
    SArray *pList = taosArrayGetP(pConf->pSttFileBlockIterArray, j);
816
    int32_t numOfIter = taosArrayGetSize(pList);
H
Haojun Liao 已提交
817

818 819
    if (numOfIter < TARRAY2_SIZE(pSttLevel->fobjArr)) {
      int32_t inc = TARRAY2_SIZE(pSttLevel->fobjArr) - numOfIter;
820
      for (int32_t k = 0; k < inc; ++k) {
821 822
        SLDataIter *pIter = taosMemoryCalloc(1, sizeof(SLDataIter));
        taosArrayPush(pList, &pIter);
H
Haojun Liao 已提交
823
      }
824 825 826 827 828 829
    } else if (numOfIter > TARRAY2_SIZE(pSttLevel->fobjArr)){
        int32_t inc = numOfIter - TARRAY2_SIZE(pSttLevel->fobjArr);
        for (int i = 0; i < inc; ++i) {
            SLDataIter *pIter = taosArrayPop(pList);
            destroyLDataIter(pIter);
        }
H
Haojun Liao 已提交
830
    }
H
Haojun Liao 已提交
831

832
    for (int32_t i = 0; i < TARRAY2_SIZE(pSttLevel->fobjArr); ++i) {  // open all last file
833
      SLDataIter *pIter = taosArrayGetP(pList, i);
H
Haojun Liao 已提交
834

835 836
      SSttFileReader    *pSttFileReader = pIter->pReader;
      SSttBlockLoadInfo *pLoadInfo = pIter->pBlockLoadInfo;
837 838 839

      // open stt file reader if not
      if (pSttFileReader == NULL) {
840
        SSttFileReaderConfig conf = {.tsdb = pConf->pTsdb, .szPage = pConf->pTsdb->pVnode->config.tsdbPageSize};
841 842 843 844
        conf.file[0] = *pSttLevel->fobjArr->data[i]->f;

        code = tsdbSttFileReaderOpen(pSttLevel->fobjArr->data[i]->fname, &conf, &pSttFileReader);
        if (code != TSDB_CODE_SUCCESS) {
S
slzhou 已提交
845 846
          tsdbError("open stt file reader error. file name %s, code %s, %s", pSttLevel->fobjArr->data[i]->fname,
                    tstrerror(code), pMTree->idStr);
847 848 849 850
        }
      }

      if (pLoadInfo == NULL) {
851
        pLoadInfo = tCreateOneLastBlockLoadInfo(pConf->pSchema, pConf->pCols, pConf->numOfCols);
852 853 854
      }

      memset(pIter, 0, sizeof(SLDataIter));
855
      code = tLDataIterOpen2(pIter, pSttFileReader, i, pMTree->backward, pConf->suid, pConf->uid, &pConf->timewindow,
856 857
                             &pConf->verRange, pLoadInfo, pMTree->idStr, pConf->strictTimeRange, pConf->loadTombFn,
                             pConf->pReader);
858 859 860
      if (code != TSDB_CODE_SUCCESS) {
        goto _end;
      }
S
slzhou 已提交
861
      
862 863 864 865 866 867 868
      bool hasVal = tLDataIterNextRow(pIter, pMTree->idStr);
      if (hasVal) {
        tMergeTreeAddIter(pMTree, pIter);
      } else {
        if (!pMTree->ignoreEarlierTs) {
          pMTree->ignoreEarlierTs = pIter->ignoreEarlierTs;
        }
H
Haojun Liao 已提交
869
      }
870
    }
H
Haojun Liao 已提交
871
  }
872

H
Haojun Liao 已提交
873
  return code;
874

H
Haojun Liao 已提交
875 876 877
_end:
  tMergeTreeClose(pMTree);
  return code;
878 879
}

880 881
void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter) { tRBTreePut(&pMTree->rbt, (SRBTreeNode *)pIter); }

882 883
bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree) { return pMTree->ignoreEarlierTs; }

H
Hongze Cheng 已提交
884
bool tMergeTreeNext(SMergeTree *pMTree) {
885 886 887 888
  int32_t code = TSDB_CODE_SUCCESS;
  if (pMTree->pIter) {
    SLDataIter *pIter = pMTree->pIter;

H
Haojun Liao 已提交
889
    bool hasVal = tLDataIterNextRow(pIter, pMTree->idStr);
890 891 892 893 894 895 896
    if (!hasVal) {
      pMTree->pIter = NULL;
    }

    // compare with min in RB Tree
    pIter = (SLDataIter *)tRBTreeMin(&pMTree->rbt);
    if (pMTree->pIter && pIter) {
H
Hongze Cheng 已提交
897
      int32_t c = pMTree->rbt.cmprFn(&pMTree->pIter->node, &pIter->node);
898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
      if (c > 0) {
        tRBTreePut(&pMTree->rbt, (SRBTreeNode *)pMTree->pIter);
        pMTree->pIter = NULL;
      } else {
        ASSERT(c);
      }
    }
  }

  if (pMTree->pIter == NULL) {
    pMTree->pIter = (SLDataIter *)tRBTreeMin(&pMTree->rbt);
    if (pMTree->pIter) {
      tRBTreeDrop(&pMTree->rbt, (SRBTreeNode *)pMTree->pIter);
    }
  }

  return pMTree->pIter != NULL;
}

H
Hongze Cheng 已提交
917
void tMergeTreeClose(SMergeTree *pMTree) {
918
  pMTree->pIter = NULL;
919 920 921 922
  if (pMTree->destroyLoadInfo) {
    pMTree->pLoadInfo = destroyLastBlockLoadInfo(pMTree->pLoadInfo);
    pMTree->destroyLoadInfo = false;
  }
923
}